diff options
74 files changed, 1856 insertions, 586 deletions
diff --git a/.ci/scripts/.gitkeep b/.ci/scripts/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 --- a/.ci/scripts/.gitkeep +++ /dev/null diff --git a/.ci/scripts/common/post-upload.sh b/.ci/scripts/common/post-upload.sh new file mode 100644 index 0000000000..bb4e9d328c --- /dev/null +++ b/.ci/scripts/common/post-upload.sh @@ -0,0 +1,15 @@ +#!/bin/bash -ex + +# Copy documentation +cp license.txt "$REV_NAME" +cp README.md "$REV_NAME" + +tar $COMPRESSION_FLAGS "$ARCHIVE_NAME" "$REV_NAME" + +mv "$REV_NAME" $RELEASE_NAME + +7z a "$REV_NAME.7z" $RELEASE_NAME + +# move the compiled archive into the artifacts directory to be uploaded by travis releases +mv "$ARCHIVE_NAME" artifacts/ +mv "$REV_NAME.7z" artifacts/ diff --git a/.ci/scripts/common/pre-upload.sh b/.ci/scripts/common/pre-upload.sh new file mode 100644 index 0000000000..3c2fc79a2c --- /dev/null +++ b/.ci/scripts/common/pre-upload.sh @@ -0,0 +1,6 @@ +#!/bin/bash -ex + +GITDATE="`git show -s --date=short --format='%ad' | sed 's/-//g'`" +GITREV="`git show -s --format='%h'`" + +mkdir -p artifacts diff --git a/.ci/scripts/format/docker.sh b/.ci/scripts/format/docker.sh new file mode 100644 index 0000000000..778411e4a4 --- /dev/null +++ b/.ci/scripts/format/docker.sh @@ -0,0 +1,6 @@ +#!/bin/bash -ex + +# Run clang-format +cd /yuzu +chmod a+x ./.ci/scripts/format/script.sh +./.ci/scripts/format/script.sh diff --git a/.ci/scripts/format/exec.sh b/.ci/scripts/format/exec.sh new file mode 100644 index 0000000000..5d6393b384 --- /dev/null +++ b/.ci/scripts/format/exec.sh @@ -0,0 +1,4 @@ +#!/bin/bash -ex + +chmod a+x ./.ci/scripts/format/docker.sh +docker run -v $(pwd):/yuzu yuzuemu/build-environments:linux-clang-format /bin/bash -ex /yuzu/.ci/scripts/format/docker.sh diff --git a/.ci/scripts/format/script.sh b/.ci/scripts/format/script.sh new file mode 100644 index 0000000000..5ab828d5e4 --- /dev/null +++ b/.ci/scripts/format/script.sh @@ -0,0 +1,37 @@ +#!/bin/bash -ex + +if grep -nrI '\s$' src *.yml *.txt *.md Doxyfile .gitignore .gitmodules .ci* dist/*.desktop \ + dist/*.svg dist/*.xml; then + echo Trailing whitespace found, aborting + exit 1 +fi + +# Default clang-format points to default 3.5 version one +CLANG_FORMAT=clang-format-6.0 +$CLANG_FORMAT --version + +if [ "$TRAVIS_EVENT_TYPE" = "pull_request" ]; then + # Get list of every file modified in this pull request + files_to_lint="$(git diff --name-only --diff-filter=ACMRTUXB $TRAVIS_COMMIT_RANGE | grep '^src/[^.]*[.]\(cpp\|h\)$' || true)" +else + # Check everything for branch pushes + files_to_lint="$(find src/ -name '*.cpp' -or -name '*.h')" +fi + +# Turn off tracing for this because it's too verbose +set +x + +for f in $files_to_lint; do + d=$(diff -u "$f" <($CLANG_FORMAT "$f") || true) + if ! [ -z "$d" ]; then + echo "!!! $f not compliant to coding style, here is the fix:" + echo "$d" + fail=1 + fi +done + +set -x + +if [ "$fail" = 1 ]; then + exit 1 +fi diff --git a/.ci/scripts/linux/docker.sh b/.ci/scripts/linux/docker.sh new file mode 100644 index 0000000000..f538a40817 --- /dev/null +++ b/.ci/scripts/linux/docker.sh @@ -0,0 +1,14 @@ +#!/bin/bash -ex + +cd /yuzu + +ccache -s + +mkdir build || true && cd build +cmake .. -G Ninja -DYUZU_USE_BUNDLED_UNICORN=ON -DYUZU_USE_QT_WEB_ENGINE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=/usr/lib/ccache/gcc -DCMAKE_CXX_COMPILER=/usr/lib/ccache/g++ -DYUZU_ENABLE_COMPATIBILITY_REPORTING=${ENABLE_COMPATIBILITY_REPORTING:-"OFF"} -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DUSE_DISCORD_PRESENCE=ON + +ninja + +ccache -s + +ctest -VV -C Release diff --git a/.ci/scripts/linux/exec.sh b/.ci/scripts/linux/exec.sh new file mode 100644 index 0000000000..a5a6c34b9a --- /dev/null +++ b/.ci/scripts/linux/exec.sh @@ -0,0 +1,5 @@ +#!/bin/bash -ex + +mkdir -p "ccache" || true +chmod a+x ./.ci/scripts/linux/docker.sh +docker run -e ENABLE_COMPATIBILITY_REPORTING -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-fresh /bin/bash /yuzu/.ci/scripts/linux/docker.sh diff --git a/.ci/scripts/linux/upload.sh b/.ci/scripts/linux/upload.sh new file mode 100644 index 0000000000..0d131d1dde --- /dev/null +++ b/.ci/scripts/linux/upload.sh @@ -0,0 +1,14 @@ +#!/bin/bash -ex + +. .ci/scripts/common/pre-upload.sh + +REV_NAME="yuzu-linux-${GITDATE}-${GITREV}" +ARCHIVE_NAME="${REV_NAME}.tar.xz" +COMPRESSION_FLAGS="-cJvf" + +mkdir "$REV_NAME" + +cp build/bin/yuzu-cmd "$REV_NAME" +cp build/bin/yuzu "$REV_NAME" + +. .ci/scripts/common/post-upload.sh diff --git a/.ci/scripts/merge/apply-patches-by-label.py b/.ci/scripts/merge/apply-patches-by-label.py new file mode 100644 index 0000000000..b346001a52 --- /dev/null +++ b/.ci/scripts/merge/apply-patches-by-label.py @@ -0,0 +1,28 @@ +# Download all pull requests as patches that match a specific label +# Usage: python download-patches-by-label.py <Label to Match> <Root Path Folder to DL to> + +import requests, sys, json, urllib3.request, shutil, subprocess + +http = urllib3.PoolManager() +dl_list = {} + +def check_individual(labels): + for label in labels: + if (label["name"] == sys.argv[1]): + return True + return False + +try: + url = 'https://api.github.com/repos/yuzu-emu/yuzu/pulls' + response = requests.get(url) + if (response.ok): + j = json.loads(response.content) + for pr in j: + if (check_individual(pr["labels"])): + pn = pr["number"] + print("Matched PR# %s" % pn) + print(subprocess.check_output(["git", "fetch", "https://github.com/yuzu-emu/yuzu.git", "pull/%s/head:pr-%s" % (pn, pn), "-f"])) + print(subprocess.check_output(["git", "merge", "--squash", "pr-%s" % pn])) + print(subprocess.check_output(["git", "commit", "-m\"Merge PR %s\"" % pn])) +except: + sys.exit(-1) diff --git a/.ci/scripts/merge/check-label-presence.py b/.ci/scripts/merge/check-label-presence.py new file mode 100644 index 0000000000..048466d7e5 --- /dev/null +++ b/.ci/scripts/merge/check-label-presence.py @@ -0,0 +1,18 @@ +# Checks to see if the specified pull request # has the specified tag +# Usage: python check-label-presence.py <Pull Request ID> <Name of Label> + +import requests, json, sys + +try: + url = 'https://api.github.com/repos/yuzu-emu/yuzu/issues/%s' % sys.argv[1] + response = requests.get(url) + if (response.ok): + j = json.loads(response.content) + for label in j["labels"]: + if label["name"] == sys.argv[2]: + print('##vso[task.setvariable variable=enabletesting;]true') + sys.exit() +except: + sys.exit(-1) + +print('##vso[task.setvariable variable=enabletesting;]false') diff --git a/.ci/scripts/merge/yuzubot-git-config.sh b/.ci/scripts/merge/yuzubot-git-config.sh new file mode 100644 index 0000000000..d9d595bbc8 --- /dev/null +++ b/.ci/scripts/merge/yuzubot-git-config.sh @@ -0,0 +1,2 @@ +git config --global user.email "yuzu@yuzu-emu.org" +git config --global user.name "yuzubot"
\ No newline at end of file diff --git a/.ci/scripts/windows/docker.sh b/.ci/scripts/windows/docker.sh new file mode 100644 index 0000000000..f7093363bc --- /dev/null +++ b/.ci/scripts/windows/docker.sh @@ -0,0 +1,50 @@ +#!/bin/bash -ex + +cd /yuzu + +ccache -s + +# Dirty hack to trick unicorn makefile into believing we are in a MINGW system +mv /bin/uname /bin/uname1 && echo -e '#!/bin/sh\necho MINGW64' >> /bin/uname +chmod +x /bin/uname + +# Dirty hack to trick unicorn makefile into believing we have cmd +echo '' >> /bin/cmd +chmod +x /bin/cmd + +mkdir build || true && cd build +cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE="$(pwd)/../CMakeModules/MinGWCross.cmake" -DUSE_CCACHE=ON -DYUZU_USE_BUNDLED_UNICORN=ON -DENABLE_COMPATIBILITY_LIST_DOWNLOAD=ON -DCMAKE_BUILD_TYPE=Release +ninja + +# Clean up the dirty hacks +rm /bin/uname && mv /bin/uname1 /bin/uname +rm /bin/cmd + +ccache -s + +echo "Tests skipped" +#ctest -VV -C Release + +echo 'Prepare binaries...' +cd .. +mkdir package + +QT_PLATFORM_DLL_PATH='/usr/x86_64-w64-mingw32/lib/qt5/plugins/platforms/' +find build/ -name "yuzu*.exe" -exec cp {} 'package' \; + +# copy Qt plugins +mkdir package/platforms +cp "${QT_PLATFORM_DLL_PATH}/qwindows.dll" package/platforms/ +cp -rv "${QT_PLATFORM_DLL_PATH}/../mediaservice/" package/ +cp -rv "${QT_PLATFORM_DLL_PATH}/../imageformats/" package/ +rm -f package/mediaservice/*d.dll + +for i in package/*.exe; do + # we need to process pdb here, however, cv2pdb + # does not work here, so we just simply strip all the debug symbols + x86_64-w64-mingw32-strip "${i}" +done + +pip3 install pefile +python3 .ci/scripts/windows/scan_dll.py package/*.exe "package/" +python3 .ci/scripts/windows/scan_dll.py package/imageformats/*.dll "package/" diff --git a/.ci/scripts/windows/exec.sh b/.ci/scripts/windows/exec.sh new file mode 100644 index 0000000000..d6a994856c --- /dev/null +++ b/.ci/scripts/windows/exec.sh @@ -0,0 +1,5 @@ +#!/bin/bash -ex + +mkdir -p "ccache" || true +chmod a+x ./.ci/scripts/windows/docker.sh +docker run -e CCACHE_DIR=/yuzu/ccache -v $(pwd):/yuzu yuzuemu/build-environments:linux-mingw /bin/bash -ex /yuzu/.ci/scripts/windows/docker.sh diff --git a/.ci/scripts/windows/scan_dll.py b/.ci/scripts/windows/scan_dll.py new file mode 100644 index 0000000000..163183f2e3 --- /dev/null +++ b/.ci/scripts/windows/scan_dll.py @@ -0,0 +1,106 @@ +import pefile +import sys +import re +import os +import queue +import shutil + +# constant definitions +KNOWN_SYS_DLLS = ['WINMM.DLL', 'MSVCRT.DLL', 'VERSION.DLL', 'MPR.DLL', + 'DWMAPI.DLL', 'UXTHEME.DLL', 'DNSAPI.DLL', 'IPHLPAPI.DLL'] +# below is for Ubuntu 18.04 with specified PPA enabled, if you are using +# other distro or different repositories, change the following accordingly +DLL_PATH = [ + '/usr/x86_64-w64-mingw32/bin/', + '/usr/x86_64-w64-mingw32/lib/', + '/usr/lib/gcc/x86_64-w64-mingw32/7.3-posix/' +] + +missing = [] + + +def parse_imports(file_name): + results = [] + pe = pefile.PE(file_name, fast_load=True) + pe.parse_data_directories() + + for entry in pe.DIRECTORY_ENTRY_IMPORT: + current = entry.dll.decode() + current_u = current.upper() # b/c Windows is often case insensitive + # here we filter out system dlls + # dll w/ names like *32.dll are likely to be system dlls + if current_u.upper() not in KNOWN_SYS_DLLS and not re.match(string=current_u, pattern=r'.*32\.DLL'): + results.append(current) + + return results + + +def parse_imports_recursive(file_name, path_list=[]): + q = queue.Queue() # create a FIFO queue + # file_name can be a string or a list for the convience + if isinstance(file_name, str): + q.put(file_name) + elif isinstance(file_name, list): + for i in file_name: + q.put(i) + full_list = [] + while q.qsize(): + current = q.get_nowait() + print('> %s' % current) + deps = parse_imports(current) + # if this dll does not have any import, ignore it + if not deps: + continue + for dep in deps: + # the dependency already included in the list, skip + if dep in full_list: + continue + # find the requested dll in the provided paths + full_path = find_dll(dep) + if not full_path: + missing.append(dep) + continue + full_list.append(dep) + q.put(full_path) + path_list.append(full_path) + return full_list + + +def find_dll(name): + for path in DLL_PATH: + for root, _, files in os.walk(path): + for f in files: + if name.lower() == f.lower(): + return os.path.join(root, f) + + +def deploy(name, dst, dry_run=False): + dlls_path = [] + parse_imports_recursive(name, dlls_path) + for dll_entry in dlls_path: + if not dry_run: + shutil.copy(dll_entry, dst) + else: + print('[Dry-Run] Copy %s to %s' % (dll_entry, dst)) + print('Deploy completed.') + return dlls_path + + +def main(): + if len(sys.argv) < 3: + print('Usage: %s [files to examine ...] [target deploy directory]') + return 1 + to_deploy = sys.argv[1:-1] + tgt_dir = sys.argv[-1] + if not os.path.isdir(tgt_dir): + print('%s is not a directory.' % tgt_dir) + return 1 + print('Scanning dependencies...') + deploy(to_deploy, tgt_dir) + if missing: + print('Following DLLs are not found: %s' % ('\n'.join(missing))) + return 0 + + +if __name__ == '__main__': + main() diff --git a/.ci/scripts/windows/upload.sh b/.ci/scripts/windows/upload.sh new file mode 100644 index 0000000000..de73d3541a --- /dev/null +++ b/.ci/scripts/windows/upload.sh @@ -0,0 +1,13 @@ +#!/bin/bash -ex + +. .ci/scripts/common/pre-upload.sh + +REV_NAME="yuzu-windows-mingw-${GITDATE}-${GITREV}" +ARCHIVE_NAME="${REV_NAME}.tar.gz" +COMPRESSION_FLAGS="-czvf" + +mkdir "$REV_NAME" +# get around the permission issues +cp -r package/* "$REV_NAME" + +. .ci/scripts/common/post-upload.sh diff --git a/.ci/templates/build-single.yml b/.ci/templates/build-single.yml new file mode 100644 index 0000000000..77eeb96b5b --- /dev/null +++ b/.ci/templates/build-single.yml @@ -0,0 +1,21 @@ +parameters: + artifactSource: 'true' + +steps: +- task: DockerInstaller@0 + displayName: 'Prepare Environment' + inputs: + dockerVersion: '17.09.0-ce' +- task: CacheBeta@0 + displayName: 'Cache Build System' + inputs: + key: yuzu-v1-$(BuildName)-$(BuildSuffix)-$(CacheSuffix) + path: $(System.DefaultWorkingDirectory)/ccache + cacheHitVar: CACHE_RESTORED +- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/exec.sh && ./.ci/scripts/$(ScriptFolder)/exec.sh + displayName: 'Build' +- script: chmod a+x ./.ci/scripts/$(ScriptFolder)/upload.sh && ./.ci/scripts/$(ScriptFolder)/upload.sh + displayName: 'Package Artifacts' +- publish: artifacts + artifact: 'yuzu-$(BuildName)-$(BuildSuffix)' + displayName: 'Upload Artifacts' diff --git a/.ci/templates/build-standard.yml b/.ci/templates/build-standard.yml new file mode 100644 index 0000000000..9975f5c49f --- /dev/null +++ b/.ci/templates/build-standard.yml @@ -0,0 +1,22 @@ +jobs: +- job: build + displayName: 'standard' + pool: + vmImage: ubuntu-latest + strategy: + maxParallel: 10 + matrix: + windows: + BuildSuffix: 'windows-mingw' + ScriptFolder: 'windows' + linux: + BuildSuffix: 'linux' + ScriptFolder: 'linux' + steps: + - template: ./sync-source.yml + parameters: + artifactSource: $(parameters.artifactSource) + needSubmodules: 'true' + - template: ./build-single.yml + parameters: + artifactSource: 'false'
\ No newline at end of file diff --git a/.ci/templates/build-testing.yml b/.ci/templates/build-testing.yml new file mode 100644 index 0000000000..101e529963 --- /dev/null +++ b/.ci/templates/build-testing.yml @@ -0,0 +1,30 @@ +jobs: +- job: build_test + displayName: 'testing' + pool: + vmImage: ubuntu-latest + strategy: + maxParallel: 10 + matrix: + windows: + BuildSuffix: 'windows-testing' + ScriptFolder: 'windows' + steps: + - task: PythonScript@0 + condition: eq(variables['Build.Reason'], 'PullRequest') + displayName: 'Determine Testing Status' + inputs: + scriptSource: 'filePath' + scriptPath: '../scripts/merge/check-label-presence.py' + arguments: '$(System.PullRequest.PullRequestNumber) create-testing-build' + - ${{ if eq(variables.enabletesting, 'true') }}: + - template: ./sync-source.yml + parameters: + artifactSource: $(parameters.artifactSource) + needSubmodules: 'true' + - template: ./mergebot.yml + parameters: + matchLabel: 'testing-merge' + - template: ./build-single.yml + parameters: + artifactSource: 'false'
\ No newline at end of file diff --git a/.ci/templates/format-check.yml b/.ci/templates/format-check.yml new file mode 100644 index 0000000000..5061f1cb8e --- /dev/null +++ b/.ci/templates/format-check.yml @@ -0,0 +1,14 @@ +parameters: + artifactSource: 'true' + +steps: +- template: ./sync-source.yml + parameters: + artifactSource: $(parameters.artifactSource) + needSubmodules: 'false' +- task: DockerInstaller@0 + displayName: 'Prepare Environment' + inputs: + dockerVersion: '17.09.0-ce' +- script: chmod a+x ./.ci/scripts/format/exec.sh && ./.ci/scripts/format/exec.sh + displayName: 'Verify Formatting' diff --git a/.ci/templates/merge.yml b/.ci/templates/merge.yml new file mode 100644 index 0000000000..efc82778ae --- /dev/null +++ b/.ci/templates/merge.yml @@ -0,0 +1,46 @@ +jobs: +- job: merge + displayName: 'pull requests' + steps: + - checkout: self + submodules: recursive + - template: ./mergebot.yml + parameters: + matchLabel: '$(BuildName)-merge' + - task: ArchiveFiles@2 + displayName: 'Package Source' + inputs: + rootFolderOrFile: '$(System.DefaultWorkingDirectory)' + includeRootFolder: false + archiveType: '7z' + archiveFile: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' + - task: PublishPipelineArtifact@1 + displayName: 'Upload Artifacts' + inputs: + targetPath: '$(Build.ArtifactStagingDirectory)/yuzu-$(BuildName)-source.7z' + artifact: 'yuzu-$(BuildName)-source' + replaceExistingArchive: true +- job: upload_source + displayName: 'upload' + dependsOn: merge + steps: + - template: ./sync-source.yml + parameters: + artifactSource: 'true' + needSubmodules: 'true' + - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh + displayName: 'Apply Git Configuration' + - script: git tag -a $(BuildName)-$(Build.BuildId) -m "yuzu $(BuildName) $(Build.BuildNumber) $(Build.DefinitionName)" + displayName: 'Tag Source' + - script: git remote add other $(GitRepoPushChangesURL) + displayName: 'Register Repository' + - script: git push --follow-tags --force other HEAD:$(GitPushBranch) + displayName: 'Update Code' + - script: git rev-list -n 1 $(BuildName)-$(Build.BuildId) > $(Build.ArtifactStagingDirectory)/tag-commit.sha + displayName: 'Calculate Release Point' + - task: PublishPipelineArtifact@1 + displayName: 'Upload Release Point' + inputs: + targetPath: '$(Build.ArtifactStagingDirectory)/tag-commit.sha' + artifact: 'yuzu-$(BuildName)-release-point' + replaceExistingArchive: true
\ No newline at end of file diff --git a/.ci/templates/mergebot.yml b/.ci/templates/mergebot.yml new file mode 100644 index 0000000000..5211efcc6d --- /dev/null +++ b/.ci/templates/mergebot.yml @@ -0,0 +1,15 @@ +parameters: + matchLabel: 'dummy-merge' + +steps: + - script: mkdir $(System.DefaultWorkingDirectory)/patches && pip install requests urllib3 + displayName: 'Prepare Environment' + - script: chmod a+x $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh && $(System.DefaultWorkingDirectory)/.ci/scripts/merge/yuzubot-git-config.sh + displayName: 'Apply Git Configuration' + - task: PythonScript@0 + displayName: 'Discover, Download, and Apply Patches' + inputs: + scriptSource: 'filePath' + scriptPath: '.ci/scripts/merge/apply-patches-by-label.py' + arguments: '${{ parameters.matchLabel }} patches' + workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.ci/templates/release.yml b/.ci/templates/release.yml new file mode 100644 index 0000000000..60bebd2aad --- /dev/null +++ b/.ci/templates/release.yml @@ -0,0 +1,29 @@ +steps: + - task: DownloadPipelineArtifact@2 + displayName: 'Download Windows Release' + inputs: + artifactName: 'yuzu-$(BuildName)-windows-mingw' + buildType: 'current' + targetPath: '$(Build.ArtifactStagingDirectory)' + - task: DownloadPipelineArtifact@2 + displayName: 'Download Linux Release' + inputs: + artifactName: 'yuzu-$(BuildName)-linux' + buildType: 'current' + targetPath: '$(Build.ArtifactStagingDirectory)' + - task: DownloadPipelineArtifact@2 + displayName: 'Download Release Point' + inputs: + artifactName: 'yuzu-$(BuildName)-release-point' + buildType: 'current' + targetPath: '$(Build.ArtifactStagingDirectory)' + - script: echo '##vso[task.setvariable variable=tagcommit]' && cat $(Build.ArtifactStagingDirectory)/tag-commit.sha + displayName: 'Calculate Release Point' + - task: GitHubRelease@0 + inputs: + gitHubConnection: $(GitHubReleaseConnectionName) + repositoryName: '$(GitHubReleaseRepoName)' + action: 'create' + target: $(variables.tagcommit) + title: 'yuzu $(BuildName) #$(Build.BuildId)' + assets: '$(Build.ArtifactStagingDirectory)/*' diff --git a/.ci/templates/retrieve-artifact-source.yml b/.ci/templates/retrieve-artifact-source.yml new file mode 100644 index 0000000000..47d217e7bd --- /dev/null +++ b/.ci/templates/retrieve-artifact-source.yml @@ -0,0 +1,16 @@ +steps: +- checkout: none +- task: DownloadPipelineArtifact@2 + displayName: 'Download Source' + inputs: + artifactName: 'yuzu-$(BuildName)-source' + buildType: 'current' + targetPath: '$(Build.ArtifactStagingDirectory)' +- script: rm -rf $(System.DefaultWorkingDirectory) && mkdir $(System.DefaultWorkingDirectory) + displayName: 'Clean Working Directory' +- task: ExtractFiles@1 + displayName: 'Prepare Source' + inputs: + archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/*.7z' + destinationFolder: '$(System.DefaultWorkingDirectory)' + cleanDestinationFolder: false
\ No newline at end of file diff --git a/.ci/templates/retrieve-master-source.yml b/.ci/templates/retrieve-master-source.yml new file mode 100644 index 0000000000..a08a3f926f --- /dev/null +++ b/.ci/templates/retrieve-master-source.yml @@ -0,0 +1,11 @@ +parameters: + needSubmodules: 'true' + +steps: +- checkout: self + displayName: 'Checkout Recursive' + submodules: recursive +# condition: eq(parameters.needSubmodules, 'true') +#- checkout: self +# displayName: 'Checkout Fast' +# condition: ne(parameters.needSubmodules, 'true') diff --git a/.ci/templates/sync-source.yml b/.ci/templates/sync-source.yml new file mode 100644 index 0000000000..409e1cd834 --- /dev/null +++ b/.ci/templates/sync-source.yml @@ -0,0 +1,7 @@ +steps: +- ${{ if eq(parameters.artifactSource, 'true') }}: + - template: ./retrieve-artifact-source.yml +- ${{ if ne(parameters.artifactSource, 'true') }}: + - template: ./retrieve-master-source.yml + parameters: + needSubmodules: $(parameters.needSubmodules)
\ No newline at end of file diff --git a/.ci/yuzu-mainline.yml b/.ci/yuzu-mainline.yml index aa912913de..164bcb165b 100644 --- a/.ci/yuzu-mainline.yml +++ b/.ci/yuzu-mainline.yml @@ -1,19 +1,23 @@ -# Starter pipeline -# Start with a minimal pipeline that you can customize to build and deploy your code. -# Add steps that build, run tests, deploy, and more: -# https://aka.ms/yaml - trigger: - master -pool: - vmImage: 'ubuntu-latest' - -steps: -- script: echo Hello, world! - displayName: 'Run a one-line script' - -- script: | - echo Add other tasks to build, test, and deploy your project. - echo See https://aka.ms/yaml - displayName: 'Run a multi-line script' +stages: +- stage: merge + displayName: 'merge' + jobs: + - template: ./templates/merge.yml +- stage: format + dependsOn: merge + displayName: 'format' + jobs: + - job: format + displayName: 'clang' + pool: + vmImage: ubuntu-latest + steps: + - template: ./templates/format-check.yml +- stage: build + displayName: 'build' + dependsOn: format + jobs: + - template: ./templates/build-standard.yml diff --git a/.ci/yuzu-verify.yml b/.ci/yuzu-verify.yml new file mode 100644 index 0000000000..d01c1feed0 --- /dev/null +++ b/.ci/yuzu-verify.yml @@ -0,0 +1,18 @@ +stages: +- stage: format + displayName: 'format' + jobs: + - job: format + displayName: 'clang' + pool: + vmImage: ubuntu-latest + steps: + - template: ./templates/format-check.yml + parameters: + artifactSource: 'false' +- stage: build + displayName: 'build' + dependsOn: format + jobs: + - template: ./templates/build-standard.yml + - template: ./templates/build-testing.yml
\ No newline at end of file diff --git a/.ci/yuzu.yml b/.ci/yuzu.yml deleted file mode 100644 index aa912913de..0000000000 --- a/.ci/yuzu.yml +++ /dev/null @@ -1,19 +0,0 @@ -# Starter pipeline -# Start with a minimal pipeline that you can customize to build and deploy your code. -# Add steps that build, run tests, deploy, and more: -# https://aka.ms/yaml - -trigger: -- master - -pool: - vmImage: 'ubuntu-latest' - -steps: -- script: echo Hello, world! - displayName: 'Run a one-line script' - -- script: | - echo Add other tasks to build, test, and deploy your project. - echo See https://aka.ms/yaml - displayName: 'Run a multi-line script' diff --git a/src/core/file_sys/program_metadata.cpp b/src/core/file_sys/program_metadata.cpp index eb76174c5b..7310b36026 100644 --- a/src/core/file_sys/program_metadata.cpp +++ b/src/core/file_sys/program_metadata.cpp @@ -94,6 +94,10 @@ u64 ProgramMetadata::GetFilesystemPermissions() const { return aci_file_access.permissions; } +u32 ProgramMetadata::GetSystemResourceSize() const { + return npdm_header.system_resource_size; +} + const ProgramMetadata::KernelCapabilityDescriptors& ProgramMetadata::GetKernelCapabilities() const { return aci_kernel_capabilities; } diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h index 43bf2820a9..88ec97d85f 100644 --- a/src/core/file_sys/program_metadata.h +++ b/src/core/file_sys/program_metadata.h @@ -58,6 +58,7 @@ public: u32 GetMainThreadStackSize() const; u64 GetTitleID() const; u64 GetFilesystemPermissions() const; + u32 GetSystemResourceSize() const; const KernelCapabilityDescriptors& GetKernelCapabilities() const; void Print() const; @@ -76,7 +77,8 @@ private: u8 reserved_3; u8 main_thread_priority; u8 main_thread_cpu; - std::array<u8, 8> reserved_4; + std::array<u8, 4> reserved_4; + u32_le system_resource_size; u32_le process_category; u32_le main_stack_size; std::array<u8, 0x10> application_name; diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index f45ef05f69..db3ab14cef 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -129,20 +129,17 @@ u64 Process::GetTotalPhysicalMemoryAvailable() const { return vm_manager.GetTotalPhysicalMemoryAvailable(); } -u64 Process::GetTotalPhysicalMemoryAvailableWithoutMmHeap() const { - // TODO: Subtract the personal heap size from this when the - // personal heap is implemented. - return GetTotalPhysicalMemoryAvailable(); +u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { + return GetTotalPhysicalMemoryAvailable() - GetSystemResourceSize(); } u64 Process::GetTotalPhysicalMemoryUsed() const { - return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size; + return vm_manager.GetCurrentHeapSize() + main_thread_stack_size + code_memory_size + + GetSystemResourceUsage(); } -u64 Process::GetTotalPhysicalMemoryUsedWithoutMmHeap() const { - // TODO: Subtract the personal heap size from this when the - // personal heap is implemented. - return GetTotalPhysicalMemoryUsed(); +u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { + return GetTotalPhysicalMemoryUsed() - GetSystemResourceUsage(); } void Process::RegisterThread(const Thread* thread) { @@ -172,6 +169,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) { program_id = metadata.GetTitleID(); ideal_core = metadata.GetMainThreadCore(); is_64bit_process = metadata.Is64BitProgram(); + system_resource_size = metadata.GetSystemResourceSize(); vm_manager.Reset(metadata.GetAddressSpaceType()); diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 83ea02beec..3196014da3 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -168,8 +168,24 @@ public: return capabilities.GetPriorityMask(); } - u32 IsVirtualMemoryEnabled() const { - return is_virtual_address_memory_enabled; + /// Gets the amount of secure memory to allocate for memory management. + u32 GetSystemResourceSize() const { + return system_resource_size; + } + + /// Gets the amount of secure memory currently in use for memory management. + u32 GetSystemResourceUsage() const { + // On hardware, this returns the amount of system resource memory that has + // been used by the kernel. This is problematic for Yuzu to emulate, because + // system resource memory is used for page tables -- and yuzu doesn't really + // have a way to calculate how much memory is required for page tables for + // the current process at any given time. + // TODO: Is this even worth implementing? Games may retrieve this value via + // an SDK function that gets used + available system resource size for debug + // or diagnostic purposes. However, it seems unlikely that a game would make + // decisions based on how much system memory is dedicated to its page tables. + // Is returning a value other than zero wise? + return 0; } /// Whether this process is an AArch64 or AArch32 process. @@ -196,15 +212,15 @@ public: u64 GetTotalPhysicalMemoryAvailable() const; /// Retrieves the total physical memory available to this process in bytes, - /// without the size of the personal heap added to it. - u64 GetTotalPhysicalMemoryAvailableWithoutMmHeap() const; + /// without the size of the personal system resource heap added to it. + u64 GetTotalPhysicalMemoryAvailableWithoutSystemResource() const; /// Retrieves the total physical memory used by this process in bytes. u64 GetTotalPhysicalMemoryUsed() const; /// Retrieves the total physical memory used by this process in bytes, - /// without the size of the personal heap added to it. - u64 GetTotalPhysicalMemoryUsedWithoutMmHeap() const; + /// without the size of the personal system resource heap added to it. + u64 GetTotalPhysicalMemoryUsedWithoutSystemResource() const; /// Gets the list of all threads created with this process as their owner. const std::list<const Thread*>& GetThreadList() const { @@ -298,12 +314,16 @@ private: /// Title ID corresponding to the process u64 program_id = 0; + /// Specifies additional memory to be reserved for the process's memory management by the + /// system. When this is non-zero, secure memory is allocated and used for page table allocation + /// instead of using the normal global page tables/memory block management. + u32 system_resource_size = 0; + /// Resource limit descriptor for this process SharedPtr<ResourceLimit> resource_limit; /// The ideal CPU core for this process, threads are scheduled on this core by default. u8 ideal_core = 0; - u32 is_virtual_address_memory_enabled = 0; /// The Thread Local Storage area is allocated as processes create threads, /// each TLS area is 0x200 bytes, so one page (0x1000) is split up in 8 parts, and each part diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 58374f8295..a46eed3daa 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -736,16 +736,16 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha StackRegionBaseAddr = 14, StackRegionSize = 15, // 3.0.0+ - IsVirtualAddressMemoryEnabled = 16, - PersonalMmHeapUsage = 17, + SystemResourceSize = 16, + SystemResourceUsage = 17, TitleId = 18, // 4.0.0+ PrivilegedProcessId = 19, // 5.0.0+ UserExceptionContextAddr = 20, // 6.0.0+ - TotalPhysicalMemoryAvailableWithoutMmHeap = 21, - TotalPhysicalMemoryUsedWithoutMmHeap = 22, + TotalPhysicalMemoryAvailableWithoutSystemResource = 21, + TotalPhysicalMemoryUsedWithoutSystemResource = 22, }; const auto info_id_type = static_cast<GetInfoType>(info_id); @@ -763,12 +763,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha case GetInfoType::StackRegionSize: case GetInfoType::TotalPhysicalMemoryAvailable: case GetInfoType::TotalPhysicalMemoryUsed: - case GetInfoType::IsVirtualAddressMemoryEnabled: - case GetInfoType::PersonalMmHeapUsage: + case GetInfoType::SystemResourceSize: + case GetInfoType::SystemResourceUsage: case GetInfoType::TitleId: case GetInfoType::UserExceptionContextAddr: - case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: - case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: { + case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: + case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: { if (info_sub_id != 0) { return ERR_INVALID_ENUM_VALUE; } @@ -829,8 +829,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha *result = process->GetTotalPhysicalMemoryUsed(); return RESULT_SUCCESS; - case GetInfoType::IsVirtualAddressMemoryEnabled: - *result = process->IsVirtualMemoryEnabled(); + case GetInfoType::SystemResourceSize: + *result = process->GetSystemResourceSize(); + return RESULT_SUCCESS; + + case GetInfoType::SystemResourceUsage: + LOG_WARNING(Kernel_SVC, "(STUBBED) Attempted to query system resource usage"); + *result = process->GetSystemResourceUsage(); return RESULT_SUCCESS; case GetInfoType::TitleId: @@ -843,12 +848,12 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha *result = 0; return RESULT_SUCCESS; - case GetInfoType::TotalPhysicalMemoryAvailableWithoutMmHeap: - *result = process->GetTotalPhysicalMemoryAvailable(); + case GetInfoType::TotalPhysicalMemoryAvailableWithoutSystemResource: + *result = process->GetTotalPhysicalMemoryAvailableWithoutSystemResource(); return RESULT_SUCCESS; - case GetInfoType::TotalPhysicalMemoryUsedWithoutMmHeap: - *result = process->GetTotalPhysicalMemoryUsedWithoutMmHeap(); + case GetInfoType::TotalPhysicalMemoryUsedWithoutSystemResource: + *result = process->GetTotalPhysicalMemoryUsedWithoutSystemResource(); return RESULT_SUCCESS; default: @@ -953,6 +958,86 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha } } +/// Maps memory at a desired address +static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { + LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); + + if (!Common::Is4KBAligned(addr)) { + LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); + return ERR_INVALID_ADDRESS; + } + + if (!Common::Is4KBAligned(size)) { + LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); + return ERR_INVALID_SIZE; + } + + if (size == 0) { + LOG_ERROR(Kernel_SVC, "Size is zero"); + return ERR_INVALID_SIZE; + } + + if (!(addr < addr + size)) { + LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); + return ERR_INVALID_MEMORY_RANGE; + } + + Process* const current_process = system.Kernel().CurrentProcess(); + auto& vm_manager = current_process->VMManager(); + + if (current_process->GetSystemResourceSize() == 0) { + LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); + return ERR_INVALID_STATE; + } + + if (!vm_manager.IsWithinMapRegion(addr, size)) { + LOG_ERROR(Kernel_SVC, "Range not within map region"); + return ERR_INVALID_MEMORY_RANGE; + } + + return vm_manager.MapPhysicalMemory(addr, size); +} + +/// Unmaps memory previously mapped via MapPhysicalMemory +static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size) { + LOG_DEBUG(Kernel_SVC, "called, addr=0x{:016X}, size=0x{:X}", addr, size); + + if (!Common::Is4KBAligned(addr)) { + LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); + return ERR_INVALID_ADDRESS; + } + + if (!Common::Is4KBAligned(size)) { + LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); + return ERR_INVALID_SIZE; + } + + if (size == 0) { + LOG_ERROR(Kernel_SVC, "Size is zero"); + return ERR_INVALID_SIZE; + } + + if (!(addr < addr + size)) { + LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); + return ERR_INVALID_MEMORY_RANGE; + } + + Process* const current_process = system.Kernel().CurrentProcess(); + auto& vm_manager = current_process->VMManager(); + + if (current_process->GetSystemResourceSize() == 0) { + LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); + return ERR_INVALID_STATE; + } + + if (!vm_manager.IsWithinMapRegion(addr, size)) { + LOG_ERROR(Kernel_SVC, "Range not within map region"); + return ERR_INVALID_MEMORY_RANGE; + } + + return vm_manager.UnmapPhysicalMemory(addr, size); +} + /// Sets the thread activity static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 activity) { LOG_DEBUG(Kernel_SVC, "called, handle=0x{:08X}, activity=0x{:08X}", handle, activity); @@ -2310,8 +2395,8 @@ static const FunctionDef SVC_Table[] = { {0x29, SvcWrap<GetInfo>, "GetInfo"}, {0x2A, nullptr, "FlushEntireDataCache"}, {0x2B, nullptr, "FlushDataCache"}, - {0x2C, nullptr, "MapPhysicalMemory"}, - {0x2D, nullptr, "UnmapPhysicalMemory"}, + {0x2C, SvcWrap<MapPhysicalMemory>, "MapPhysicalMemory"}, + {0x2D, SvcWrap<UnmapPhysicalMemory>, "UnmapPhysicalMemory"}, {0x2E, nullptr, "GetFutureThreadInfo"}, {0x2F, nullptr, "GetLastThreadInfo"}, {0x30, SvcWrap<GetResourceLimitLimitValue>, "GetResourceLimitLimitValue"}, diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h index 865473c6fa..c2d8d0dc30 100644 --- a/src/core/hle/kernel/svc_wrap.h +++ b/src/core/hle/kernel/svc_wrap.h @@ -32,6 +32,11 @@ void SvcWrap(Core::System& system) { FuncReturn(system, func(system, Param(system, 0)).raw); } +template <ResultCode func(Core::System&, u64, u64)> +void SvcWrap(Core::System& system) { + FuncReturn(system, func(system, Param(system, 0), Param(system, 1)).raw); +} + template <ResultCode func(Core::System&, u32)> void SvcWrap(Core::System& system) { FuncReturn(system, func(system, static_cast<u32>(Param(system, 0))).raw); diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 7bc925a5f7..4f45fb03b3 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -11,6 +11,8 @@ #include "core/core.h" #include "core/file_sys/program_metadata.h" #include "core/hle/kernel/errors.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/resource_limit.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/memory_setup.h" @@ -48,10 +50,14 @@ bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { type != next.type) { return false; } - if (type == VMAType::AllocatedMemoryBlock && - (backing_block != next.backing_block || offset + size != next.offset)) { + if ((attribute & MemoryAttribute::DeviceMapped) == MemoryAttribute::DeviceMapped) { + // TODO: Can device mapped memory be merged sanely? + // Not merging it may cause inaccuracies versus hardware when memory layout is queried. return false; } + if (type == VMAType::AllocatedMemoryBlock) { + return true; + } if (type == VMAType::BackingMemory && backing_memory + size != next.backing_memory) { return false; } @@ -99,7 +105,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, std::size_t offset, u64 size, - MemoryState state) { + MemoryState state, VMAPermission perm) { ASSERT(block != nullptr); ASSERT(offset + size <= block->size()); @@ -109,7 +115,7 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target, ASSERT(final_vma.size == size); final_vma.type = VMAType::AllocatedMemoryBlock; - final_vma.permissions = VMAPermission::ReadWrite; + final_vma.permissions = perm; final_vma.state = state; final_vma.backing_block = std::move(block); final_vma.offset = offset; @@ -288,6 +294,166 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { return MakeResult<VAddr>(heap_region_base); } +ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { + const auto end_addr = target + size; + const auto last_addr = end_addr - 1; + VAddr cur_addr = target; + + ResultCode result = RESULT_SUCCESS; + + // Check how much memory we've already mapped. + const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size); + if (mapped_size_result.Failed()) { + return mapped_size_result.Code(); + } + + // If we've already mapped the desired amount, return early. + const std::size_t mapped_size = *mapped_size_result; + if (mapped_size == size) { + return RESULT_SUCCESS; + } + + // Check that we can map the memory we want. + const auto res_limit = system.CurrentProcess()->GetResourceLimit(); + const u64 physmem_remaining = res_limit->GetMaxResourceValue(ResourceType::PhysicalMemory) - + res_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory); + if (physmem_remaining < (size - mapped_size)) { + return ERR_RESOURCE_LIMIT_EXCEEDED; + } + + // Keep track of the memory regions we unmap. + std::vector<std::pair<u64, u64>> mapped_regions; + + // Iterate, trying to map memory. + { + cur_addr = target; + + auto iter = FindVMA(target); + ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); + + while (true) { + const auto& vma = iter->second; + const auto vma_start = vma.base; + const auto vma_end = vma_start + vma.size; + const auto vma_last = vma_end - 1; + + // Map the memory block + const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); + if (vma.state == MemoryState::Unmapped) { + const auto map_res = + MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0, + map_size, MemoryState::Heap, VMAPermission::ReadWrite); + result = map_res.Code(); + if (result.IsError()) { + break; + } + + mapped_regions.emplace_back(cur_addr, map_size); + } + + // Break once we hit the end of the range. + if (last_addr <= vma_last) { + break; + } + + // Advance to the next block. + cur_addr = vma_end; + iter = FindVMA(cur_addr); + ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end"); + } + } + + // If we failed, unmap memory. + if (result.IsError()) { + for (const auto [unmap_address, unmap_size] : mapped_regions) { + ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(), + "MapPhysicalMemory un-map on error"); + } + + return result; + } + + // Update amount of mapped physical memory. + physical_memory_mapped += size - mapped_size; + + return RESULT_SUCCESS; +} + +ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { + const auto end_addr = target + size; + const auto last_addr = end_addr - 1; + VAddr cur_addr = target; + + ResultCode result = RESULT_SUCCESS; + + // Check how much memory is currently mapped. + const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size); + if (mapped_size_result.Failed()) { + return mapped_size_result.Code(); + } + + // If we've already unmapped all the memory, return early. + const std::size_t mapped_size = *mapped_size_result; + if (mapped_size == 0) { + return RESULT_SUCCESS; + } + + // Keep track of the memory regions we unmap. + std::vector<std::pair<u64, u64>> unmapped_regions; + + // Try to unmap regions. + { + cur_addr = target; + + auto iter = FindVMA(target); + ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); + + while (true) { + const auto& vma = iter->second; + const auto vma_start = vma.base; + const auto vma_end = vma_start + vma.size; + const auto vma_last = vma_end - 1; + + // Unmap the memory block + const auto unmap_size = std::min(end_addr - cur_addr, vma_end - cur_addr); + if (vma.state == MemoryState::Heap) { + result = UnmapRange(cur_addr, unmap_size); + if (result.IsError()) { + break; + } + + unmapped_regions.emplace_back(cur_addr, unmap_size); + } + + // Break once we hit the end of the range. + if (last_addr <= vma_last) { + break; + } + + // Advance to the next block. + cur_addr = vma_end; + iter = FindVMA(cur_addr); + ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end"); + } + } + + // If we failed, re-map regions. + // TODO: Preserve memory contents? + if (result.IsError()) { + for (const auto [map_address, map_size] : unmapped_regions) { + const auto remap_res = + MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0, + map_size, MemoryState::Heap, VMAPermission::None); + ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); + } + } + + // Update mapped amount + physical_memory_mapped -= mapped_size; + + return RESULT_SUCCESS; +} + ResultCode VMManager::MapCodeMemory(VAddr dst_address, VAddr src_address, u64 size) { constexpr auto ignore_attribute = MemoryAttribute::LockedForIPC | MemoryAttribute::DeviceMapped; const auto src_check_result = CheckRangeState( @@ -435,7 +601,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem // Protect mirror with permissions from old region Reprotect(new_vma, vma->second.permissions); // Remove permissions from old region - Reprotect(vma, VMAPermission::None); + ReprotectRange(src_addr, size, VMAPermission::None); return RESULT_SUCCESS; } @@ -568,14 +734,14 @@ VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u64 offset_in_vma) { VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { const VMAIter next_vma = std::next(iter); if (next_vma != vma_map.end() && iter->second.CanBeMergedWith(next_vma->second)) { - iter->second.size += next_vma->second.size; + MergeAdjacentVMA(iter->second, next_vma->second); vma_map.erase(next_vma); } if (iter != vma_map.begin()) { VMAIter prev_vma = std::prev(iter); if (prev_vma->second.CanBeMergedWith(iter->second)) { - prev_vma->second.size += iter->second.size; + MergeAdjacentVMA(prev_vma->second, iter->second); vma_map.erase(iter); iter = prev_vma; } @@ -584,6 +750,38 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) { return iter; } +void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right) { + ASSERT(left.CanBeMergedWith(right)); + + // Always merge allocated memory blocks, even when they don't share the same backing block. + if (left.type == VMAType::AllocatedMemoryBlock && + (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { + // Check if we can save work. + if (left.offset == 0 && left.size == left.backing_block->size()) { + // Fast case: left is an entire backing block. + left.backing_block->insert(left.backing_block->end(), + right.backing_block->begin() + right.offset, + right.backing_block->begin() + right.offset + right.size); + } else { + // Slow case: make a new memory block for left and right. + auto new_memory = std::make_shared<std::vector<u8>>(); + new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, + left.backing_block->begin() + left.offset + left.size); + new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, + right.backing_block->begin() + right.offset + right.size); + left.backing_block = new_memory; + left.offset = 0; + } + + // Page table update is needed, because backing memory changed. + left.size += right.size; + UpdatePageTableForVMA(left); + } else { + // Just update the size. + left.size += right.size; + } +} + void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { switch (vma.type) { case VMAType::Free: @@ -758,6 +956,84 @@ VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, Memo std::make_tuple(initial_state, initial_permissions, initial_attributes & ~ignore_mask)); } +ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address, + std::size_t size) const { + const VAddr end_addr = address + size; + const VAddr last_addr = end_addr - 1; + std::size_t mapped_size = 0; + + VAddr cur_addr = address; + auto iter = FindVMA(cur_addr); + ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); + + while (true) { + const auto& vma = iter->second; + const VAddr vma_start = vma.base; + const VAddr vma_end = vma_start + vma.size; + const VAddr vma_last = vma_end - 1; + + // Add size if relevant. + if (vma.state != MemoryState::Unmapped) { + mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); + } + + // Break once we hit the end of the range. + if (last_addr <= vma_last) { + break; + } + + // Advance to the next block. + cur_addr = vma_end; + iter = std::next(iter); + ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end"); + } + + return MakeResult(mapped_size); +} + +ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr address, + std::size_t size) const { + const VAddr end_addr = address + size; + const VAddr last_addr = end_addr - 1; + std::size_t mapped_size = 0; + + VAddr cur_addr = address; + auto iter = FindVMA(cur_addr); + ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); + + while (true) { + const auto& vma = iter->second; + const auto vma_start = vma.base; + const auto vma_end = vma_start + vma.size; + const auto vma_last = vma_end - 1; + const auto state = vma.state; + const auto attr = vma.attribute; + + // Memory within region must be free or mapped heap. + if (!((state == MemoryState::Heap && attr == MemoryAttribute::None) || + (state == MemoryState::Unmapped))) { + return ERR_INVALID_ADDRESS_STATE; + } + + // Add size if relevant. + if (state != MemoryState::Unmapped) { + mapped_size += std::min(end_addr - cur_addr, vma_end - cur_addr); + } + + // Break once we hit the end of the range. + if (last_addr <= vma_last) { + break; + } + + // Advance to the next block. + cur_addr = vma_end; + iter = std::next(iter); + ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end"); + } + + return MakeResult(mapped_size); +} + u64 VMManager::GetTotalPhysicalMemoryAvailable() const { LOG_WARNING(Kernel, "(STUBBED) called"); return 0xF8000000; diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 9fe6ac3f46..0aecb74991 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -349,7 +349,8 @@ public: * @param state MemoryState tag to attach to the VMA. */ ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block, - std::size_t offset, u64 size, MemoryState state); + std::size_t offset, u64 size, MemoryState state, + VMAPermission perm = VMAPermission::ReadWrite); /** * Maps an unmanaged host memory pointer at a given address. @@ -450,6 +451,34 @@ public: /// ResultVal<VAddr> SetHeapSize(u64 size); + /// Maps memory at a given address. + /// + /// @param addr The virtual address to map memory at. + /// @param size The amount of memory to map. + /// + /// @note The destination address must lie within the Map region. + /// + /// @note This function requires that SystemResourceSize be non-zero, + /// however, this is just because if it were not then the + /// resulting page tables could be exploited on hardware by + /// a malicious program. SystemResource usage does not need + /// to be explicitly checked or updated here. + ResultCode MapPhysicalMemory(VAddr target, u64 size); + + /// Unmaps memory at a given address. + /// + /// @param addr The virtual address to unmap memory at. + /// @param size The amount of memory to unmap. + /// + /// @note The destination address must lie within the Map region. + /// + /// @note This function requires that SystemResourceSize be non-zero, + /// however, this is just because if it were not then the + /// resulting page tables could be exploited on hardware by + /// a malicious program. SystemResource usage does not need + /// to be explicitly checked or updated here. + ResultCode UnmapPhysicalMemory(VAddr target, u64 size); + /// Maps a region of memory as code memory. /// /// @param dst_address The base address of the region to create the aliasing memory region. @@ -657,6 +686,11 @@ private: */ VMAIter MergeAdjacent(VMAIter vma); + /** + * Merges two adjacent VMAs. + */ + void MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryArea& right); + /// Updates the pages corresponding to this VMA so they match the VMA's attributes. void UpdatePageTableForVMA(const VirtualMemoryArea& vma); @@ -701,6 +735,13 @@ private: MemoryAttribute attribute_mask, MemoryAttribute attribute, MemoryAttribute ignore_mask) const; + /// Gets the amount of memory currently mapped (state != Unmapped) in a range. + ResultVal<std::size_t> SizeOfAllocatedVMAsInRange(VAddr address, std::size_t size) const; + + /// Gets the amount of memory unmappable by UnmapPhysicalMemory in a range. + ResultVal<std::size_t> SizeOfUnmappablePhysicalMemoryInRange(VAddr address, + std::size_t size) const; + /** * A map covering the entirety of the managed address space, keyed by the `base` field of each * VMA. It must always be modified by splitting or merging VMAs, so that the invariant @@ -742,6 +783,11 @@ private: // end of the range. This is essentially 'base_address + current_size'. VAddr heap_end = 0; + // The current amount of memory mapped via MapPhysicalMemory. + // This is used here (and in Nintendo's kernel) only for debugging, and does not impact + // any behavior. + u64 physical_memory_mapped = 0; + Core::System& system; }; } // namespace Kernel diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index cd32c65d3b..7c18c27b36 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(video_core STATIC + buffer_cache.h dma_pusher.cpp dma_pusher.h debug_utils/debug_utils.cpp @@ -43,8 +44,6 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_framebuffer_cache.cpp renderer_opengl/gl_framebuffer_cache.h - renderer_opengl/gl_global_cache.cpp - renderer_opengl/gl_global_cache.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h new file mode 100644 index 0000000000..6f868b8b4e --- /dev/null +++ b/src/video_core/buffer_cache.h @@ -0,0 +1,299 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <mutex> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_cache.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template <typename BufferStorageType> +class CachedBuffer final : public RasterizerCacheObject { +public: + explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} + ~CachedBuffer() override = default; + + VAddr GetCpuAddr() const override { + return cpu_addr; + } + + std::size_t GetSizeInBytes() const override { + return size; + } + + u8* GetWritableHostPtr() const { + return host_ptr; + } + + std::size_t GetSize() const { + return size; + } + + std::size_t GetCapacity() const { + return capacity; + } + + bool IsInternalized() const { + return is_internal; + } + + const BufferStorageType& GetBuffer() const { + return buffer; + } + + void SetSize(std::size_t new_size) { + size = new_size; + } + + void SetInternalState(bool is_internal_) { + is_internal = is_internal_; + } + + BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { + capacity = new_capacity; + std::swap(buffer, buffer_); + return buffer_; + } + +private: + u8* host_ptr{}; + VAddr cpu_addr{}; + std::size_t size{}; + std::size_t capacity{}; + bool is_internal{}; + BufferStorageType buffer; +}; + +template <typename BufferStorageType, typename BufferType, typename StreamBuffer> +class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> { +public: + using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>; + using BufferInfo = std::pair<const BufferType*, u64>; + + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + std::unique_ptr<StreamBuffer> stream_buffer) + : RasterizerCache<Buffer>{rasterizer}, system{system}, + stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ + this->stream_buffer->GetHandle()} {} + ~BufferCache() = default; + + void Unregister(const Buffer& entry) override { + std::lock_guard lock{RasterizerCache<Buffer>::mutex}; + if (entry->IsInternalized()) { + internalized_entries.erase(entry->GetCacheAddr()); + } + ReserveBuffer(entry); + RasterizerCache<Buffer>::Unregister(entry); + } + + void TickFrame() { + marked_for_destruction_index = + (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); + MarkedForDestruction().clear(); + } + + BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, + bool internalize = false, bool is_written = false) { + std::lock_guard lock{RasterizerCache<Buffer>::mutex}; + + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return {GetEmptyBuffer(size), 0}; + } + const auto cache_addr = ToCacheAddr(host_ptr); + + // Cache management is a big overhead, so only cache entries with a given size. + // TODO: Figure out which size is the best for given games. + constexpr std::size_t max_stream_size = 0x800; + if (!internalize && size < max_stream_size && + internalized_entries.find(cache_addr) == internalized_entries.end()) { + return StreamBufferUpload(host_ptr, size, alignment); + } + + auto entry = RasterizerCache<Buffer>::TryGet(cache_addr); + if (!entry) { + return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); + } + + if (entry->GetSize() < size) { + IncreaseBufferSize(entry, size); + } + if (is_written) { + entry->MarkAsModified(true, *this); + } + return {ToHandle(entry->GetBuffer()), 0}; + } + + /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. + BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4) { + std::lock_guard lock{RasterizerCache<Buffer>::mutex}; + return StreamBufferUpload(raw_pointer, size, alignment); + } + + void Map(std::size_t max_size) { + std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); + buffer_offset = buffer_offset_base; + } + + /// Finishes the upload stream, returns true on bindings invalidation. + bool Unmap() { + stream_buffer->Unmap(buffer_offset - buffer_offset_base); + return std::exchange(invalidated, false); + } + + virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; + +protected: + void FlushObjectInner(const Buffer& entry) override { + DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); + } + + virtual BufferStorageType CreateBuffer(std::size_t size) = 0; + + virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; + + virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, + std::size_t size, const u8* data) = 0; + + virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, + std::size_t size, u8* data) = 0; + + virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, + std::size_t src_offset, std::size_t dst_offset, + std::size_t size) = 0; + +private: + BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, + std::size_t alignment) { + AlignBuffer(alignment); + const std::size_t uploaded_offset = buffer_offset; + std::memcpy(buffer_ptr, raw_pointer, size); + + buffer_ptr += size; + buffer_offset += size; + return {&stream_buffer_handle, uploaded_offset}; + } + + BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, + bool internalize, bool is_written) { + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + + auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); + entry->SetSize(size); + entry->SetInternalState(internalize); + RasterizerCache<Buffer>::Register(entry); + + if (internalize) { + internalized_entries.emplace(ToCacheAddr(host_ptr)); + } + if (is_written) { + entry->MarkAsModified(true, *this); + } + + if (entry->GetCapacity() < size) { + MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); + } + + UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); + return {ToHandle(entry->GetBuffer()), 0}; + } + + void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { + const std::size_t old_size = entry->GetSize(); + if (entry->GetCapacity() < new_size) { + const auto& old_buffer = entry->GetBuffer(); + auto new_buffer = CreateBuffer(new_size); + + // Copy bits from the old buffer to the new buffer. + CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); + MarkedForDestruction().push_back( + entry->ExchangeBuffer(std::move(new_buffer), new_size)); + + // This buffer could have been used + invalidated = true; + } + // Upload the new bits. + const std::size_t size_diff = new_size - old_size; + UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); + + // Update entry's size in the object and in the cache. + Unregister(entry); + + entry->SetSize(new_size); + RasterizerCache<Buffer>::Register(entry); + } + + Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { + if (auto entry = TryGetReservedBuffer(host_ptr)) { + return entry; + } + return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr); + } + + Buffer TryGetReservedBuffer(u8* host_ptr) { + const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); + if (it == buffer_reserve.end()) { + return {}; + } + auto& reserve = it->second; + auto entry = reserve.back(); + reserve.pop_back(); + return entry; + } + + void ReserveBuffer(Buffer entry) { + buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); + } + + void AlignBuffer(std::size_t alignment) { + // Align the offset, not the mapped pointer + const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; + } + + std::vector<BufferStorageType>& MarkedForDestruction() { + return marked_for_destruction_ring_buffer[marked_for_destruction_index]; + } + + Core::System& system; + + std::unique_ptr<StreamBuffer> stream_buffer; + BufferType stream_buffer_handle{}; + + bool invalidated = false; + + u8* buffer_ptr = nullptr; + u64 buffer_offset = 0; + u64 buffer_offset_base = 0; + + std::size_t marked_for_destruction_index = 0; + std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer; + + std::unordered_set<CacheAddr> internalized_entries; + std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve; +}; + +} // namespace VideoCommon diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 13e3149444..8d15c8a482 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -67,6 +67,7 @@ public: static constexpr std::size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. static constexpr std::size_t MaxConstBuffers = 18; + static constexpr std::size_t MaxConstBufferSize = 0x10000; enum class QueryMode : u32 { Write = 0, diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c3055602b9..79d469b88b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -78,7 +78,7 @@ union Attribute { constexpr explicit Attribute(u64 value) : value(value) {} enum class Index : u64 { - PointSize = 6, + LayerViewportPointSize = 6, Position = 7, Attribute_0 = 8, Attribute_31 = 39, @@ -1278,6 +1278,7 @@ union Instruction { union { BitField<49, 1, u64> nodep_flag; BitField<53, 4, u64> texture_info; + BitField<59, 1, u64> fp32_flag; TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. @@ -1776,7 +1777,7 @@ private: INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"), INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), INST("11011100--11----", Id::TLD, Type::Texture, "TLD"), - INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), + INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"), INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 52706505b0..1b4975498c 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -31,7 +31,7 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { auto& rasterizer{renderer.Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(rasterizer); + memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index c766ed692b..9f59a2dc1f 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -4,14 +4,18 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/macro_interpreter.h" +MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); + namespace Tegra { MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { + MICROPROFILE_SCOPE(MacroInterp); Reset(); registers[1] = parameters[0]; this->parameters = std::move(parameters); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3224531162..bffae940c0 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -5,13 +5,17 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" +#include "core/core.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" namespace Tegra { -MemoryManager::MemoryManager(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} { +MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) + : rasterizer{rasterizer}, system{system} { std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); std::fill(page_table.attributes.begin(), page_table.attributes.end(), Common::PageType::Unmapped); @@ -49,6 +53,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { const GPUVAddr gpu_addr{FindFreeRegion(address_space_base, aligned_size)}; MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); + ASSERT(system.CurrentProcess() + ->VMManager() + .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, + Kernel::MemoryAttribute::DeviceMapped) + .IsSuccess()); return gpu_addr; } @@ -59,7 +68,11 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) const u64 aligned_size{Common::AlignUp(size, page_size)}; MapBackingMemory(gpu_addr, Memory::GetPointer(cpu_addr), aligned_size, cpu_addr); - + ASSERT(system.CurrentProcess() + ->VMManager() + .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, + Kernel::MemoryAttribute::DeviceMapped) + .IsSuccess()); return gpu_addr; } @@ -68,9 +81,16 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { const u64 aligned_size{Common::AlignUp(size, page_size)}; const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; + const auto cpu_addr = GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); + ASSERT(system.CurrentProcess() + ->VMManager() + .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, + Kernel::MemoryAttribute::None) + .IsSuccess()); return gpu_addr; } diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 43a84bd528..aea0100870 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -14,6 +14,10 @@ namespace VideoCore { class RasterizerInterface; } +namespace Core { +class System; +} + namespace Tegra { /** @@ -47,7 +51,7 @@ struct VirtualMemoryArea { class MemoryManager final { public: - explicit MemoryManager(VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); ~MemoryManager(); GPUVAddr AllocateSpace(u64 size, u64 align); @@ -173,6 +177,8 @@ private: Common::PageTable page_table{page_bits}; VMAMap vma_map; VideoCore::RasterizerInterface& rasterizer; + + Core::System& system; }; } // namespace Tegra diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5ee4f8e8ec..2b7367568e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,6 +47,9 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + /// Notify rasterizer that a frame is about to finish + virtual void TickFrame() = 0; + /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2b9bd142e3..2a9b523f5b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,103 +2,57 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <cstring> #include <memory> -#include "common/alignment.h" -#include "core/core.h" -#include "video_core/memory_manager.h" +#include <glad/glad.h> + +#include "common/assert.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, - std::size_t alignment, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, - alignment{alignment} {} - -OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) - : RasterizerCache{rasterizer}, stream_buffer(size, true) {} - -GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, - bool cache) { - std::lock_guard lock{mutex}; - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - - // Cache management is a big overhead, so only cache entries with a given size. - // TODO: Figure out which size is the best for given games. - cache &= size >= 2048; - - const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; - if (cache) { - auto entry = TryGet(host_ptr); - if (entry) { - if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { - return entry->GetOffset(); - } - Unregister(entry); - } - } +OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, + std::size_t stream_size) + : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{ + rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {} - AlignBuffer(alignment); - const GLintptr uploaded_offset = buffer_offset; +OGLBufferCache::~OGLBufferCache() = default; - if (!host_ptr) { - return uploaded_offset; - } - - std::memcpy(buffer_ptr, host_ptr, size); - buffer_ptr += size; - buffer_offset += size; - - if (cache) { - auto entry = std::make_shared<CachedBufferEntry>( - *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); - Register(entry); - } - - return uploaded_offset; +OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { + OGLBuffer buffer; + buffer.Create(); + glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); + return buffer; } -GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment) { - std::lock_guard lock{mutex}; - AlignBuffer(alignment); - std::memcpy(buffer_ptr, raw_pointer, size); - const GLintptr uploaded_offset = buffer_offset; - - buffer_ptr += size; - buffer_offset += size; - return uploaded_offset; +const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { + return &buffer.handle; } -bool OGLBufferCache::Map(std::size_t max_size) { - bool invalidate; - std::tie(buffer_ptr, buffer_offset_base, invalidate) = - stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4); - buffer_offset = buffer_offset_base; - - if (invalidate) { - InvalidateAll(); - } - return invalidate; +const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { + static const GLuint null_buffer = 0; + return &null_buffer; } -void OGLBufferCache::Unmap() { - stream_buffer.Unmap(buffer_offset - buffer_offset_base); +void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + const u8* data) { + glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), + static_cast<GLsizeiptr>(size), data); } -GLuint OGLBufferCache::GetHandle() const { - return stream_buffer.GetHandle(); +void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, + std::size_t size, u8* data) { + glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), + static_cast<GLsizeiptr>(size), data); } -void OGLBufferCache::AlignBuffer(std::size_t alignment) { - // Align the offset, not the mapped pointer - const GLintptr offset_aligned = - static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); - buffer_ptr += offset_aligned - buffer_offset; - buffer_offset = offset_aligned; +void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, + std::size_t src_offset, std::size_t dst_offset, + std::size_t size) { + glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset), + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f2347581b5..8c8ac4038e 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -4,80 +4,44 @@ #pragma once -#include <cstddef> #include <memory> -#include <tuple> #include "common/common_types.h" +#include "video_core/buffer_cache.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" +namespace Core { +class System; +} + namespace OpenGL { +class OGLStreamBuffer; class RasterizerOpenGL; -class CachedBufferEntry final : public RasterizerCacheObject { -public: - explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, - std::size_t alignment, u8* host_ptr); - - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return size; - } - - std::size_t GetSize() const { - return size; - } - - GLintptr GetOffset() const { - return offset; - } - - std::size_t GetAlignment() const { - return alignment; - } - -private: - VAddr cpu_addr{}; - std::size_t size{}; - GLintptr offset{}; - std::size_t alignment{}; -}; - -class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { +class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> { public: - explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); - - /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been - /// allocated. - GLintptr UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, - bool cache = true); + explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, + std::size_t stream_size); + ~OGLBufferCache(); - /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. - GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); - - bool Map(std::size_t max_size); - void Unmap(); - - GLuint GetHandle() const; + const GLuint* GetEmptyBuffer(std::size_t) override; protected: - void AlignBuffer(std::size_t alignment); + OGLBuffer CreateBuffer(std::size_t size) override; + + const GLuint* ToHandle(const OGLBuffer& buffer) override; - // We do not have to flush this cache as things in it are never modified by us. - void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {} + void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + const u8* data) override; -private: - OGLStreamBuffer stream_buffer; + void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + u8* data) override; - u8* buffer_ptr = nullptr; - GLintptr buffer_offset = 0; - GLintptr buffer_offset_base = 0; + void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) override; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a48e14d2ee..85424a4c9a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -24,8 +24,10 @@ T GetInteger(GLenum pname) { Device::Device() { uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); + has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); } @@ -34,6 +36,7 @@ Device::Device(std::nullptr_t) { uniform_buffer_alignment = 0; max_vertex_attributes = 16; max_varyings = 15; + has_vertex_viewport_layer = true; has_variable_aoffi = true; has_component_indexing_bug = false; } diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c937600..dc883722df 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -18,6 +18,10 @@ public: return uniform_buffer_alignment; } + std::size_t GetShaderStorageBufferAlignment() const { + return shader_storage_alignment; + } + u32 GetMaxVertexAttributes() const { return max_vertex_attributes; } @@ -26,6 +30,10 @@ public: return max_varyings; } + bool HasVertexViewportLayer() const { + return has_vertex_viewport_layer; + } + bool HasVariableAoffi() const { return has_variable_aoffi; } @@ -39,8 +47,10 @@ private: static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; + std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; + bool has_vertex_viewport_layer{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; }; diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp deleted file mode 100644 index d5e385151f..0000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <glad/glad.h> - -#include "common/logging/log.h" -#include "core/core.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_global_cache.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/utils.h" - -namespace OpenGL { - -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, - max_size{max_size} { - buffer.Create(); - LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); -} - -CachedGlobalRegion::~CachedGlobalRegion() = default; - -void CachedGlobalRegion::Reload(u32 size_) { - size = size_; - if (size > max_size) { - size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, - max_size); - } - glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); -} - -void CachedGlobalRegion::Flush() { - LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); - glGetNamedBufferSubData(buffer.handle, 0, static_cast<GLsizeiptr>(size), host_ptr); -} - -GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { - const auto search{reserve.find(addr)}; - if (search == reserve.end()) { - return {}; - } - return search->second; -} - -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, - u32 size) { - GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; - if (!region) { - // No reserved surface available, create a new one and reserve it - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; - ASSERT(cpu_addr); - - region = std::make_shared<CachedGlobalRegion>(*cpu_addr, host_ptr, size, max_ssbo_size); - ReserveGlobalRegion(region); - } - region->Reload(size); - return region; -} - -void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { - reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); -} - -GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} { - GLint max_ssbo_size_; - glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); - max_ssbo_size = static_cast<u32>(max_ssbo_size_); -} - -GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( - const GLShader::GlobalMemoryEntry& global_region, - Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { - std::lock_guard lock{mutex}; - - auto& gpu{Core::System::GetInstance().GPU()}; - auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; - const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + - global_region.GetCbufOffset()}; - const auto actual_addr{memory_manager.Read<u64>(addr)}; - const auto size{memory_manager.Read<u32>(addr + 8)}; - - // Look up global region in the cache based on address - const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; - GlobalRegion region{TryGet(host_ptr)}; - - if (!region) { - // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); - Register(region); - } - - return region; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h deleted file mode 100644 index 2d467a2401..0000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <memory> -#include <unordered_map> - -#include <glad/glad.h> - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" - -namespace OpenGL { - -namespace GLShader { -class GlobalMemoryEntry; -} - -class RasterizerOpenGL; -class CachedGlobalRegion; -using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; - -class CachedGlobalRegion final : public RasterizerCacheObject { -public: - explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); - ~CachedGlobalRegion(); - - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return size; - } - - /// Gets the GL program handle for the buffer - GLuint GetBufferHandle() const { - return buffer.handle; - } - - /// Reloads the global region from guest memory - void Reload(u32 size_); - - void Flush(); - -private: - VAddr cpu_addr{}; - u8* host_ptr{}; - u32 size{}; - u32 max_size{}; - - OGLBuffer buffer; -}; - -class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> { -public: - explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); - - /// Gets the current specified shader stage program - GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, - Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); - -protected: - void FlushObjectInner(const GlobalRegion& object) override { - object->Flush(); - } - -private: - GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); - void ReserveGlobalRegion(GlobalRegion region); - - std::unordered_map<CacheAddr, GlobalRegion> reserve; - u32 max_ssbo_size{}; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f45a3c5efc..0bb5c068c3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -20,6 +20,7 @@ #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -80,11 +81,25 @@ struct DrawParameters { } }; +static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry) { + if (!entry.IsIndirect()) { + return entry.GetSize(); + } + + if (buffer.size > Maxwell::MaxConstBufferSize) { + LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, + Maxwell::MaxConstBufferSize); + return Maxwell::MaxConstBufferSize; + } + + return buffer.size; +} + RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, - global_cache{*this}, system{system}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE) { + system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); @@ -129,8 +144,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { state.draw.vertex_array = vao; state.ApplyVertexArrayState(); - glVertexArrayElementBuffer(vao, buffer_cache.GetHandle()); - // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. // Enables the first 16 vertex attributes always, as we don't know which ones are actually // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 @@ -197,11 +210,11 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { ASSERT(end > start); const u64 size = end - start + 1; - const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); + const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); // Bind the vertex array to the buffer at the current offset. - glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, - vertex_array.stride); + vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, + vertex_array.stride); if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { // Enable vertex buffer instancing with the specified divisor. @@ -215,7 +228,19 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { gpu.dirty_flags.vertex_array.reset(); } -DrawParameters RasterizerOpenGL::SetupDraw() { +GLintptr RasterizerOpenGL::SetupIndexBuffer() { + if (accelerate_draw != AccelDraw::Indexed) { + return 0; + } + MICROPROFILE_SCOPE(OpenGL_Index); + const auto& regs = system.GPU().Maxwell3D().regs; + const std::size_t size = CalculateIndexBufferSize(); + const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + vertex_array_pushbuffer.SetIndexBuffer(buffer); + return offset; +} + +DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { const auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const bool is_indexed = accelerate_draw == AccelDraw::Indexed; @@ -227,11 +252,9 @@ DrawParameters RasterizerOpenGL::SetupDraw() { params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); if (is_indexed) { - MICROPROFILE_SCOPE(OpenGL_Index); params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); params.count = regs.index_array.count; - params.index_buffer_offset = - buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); + params.index_buffer_offset = index_buffer_offset; params.base_vertex = static_cast<GLint>(regs.vb_element_base); } else { params.count = regs.vertex_buffer.count; @@ -247,10 +270,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { BaseBindings base_bindings; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; - // Prepare packed bindings - bind_ubo_pushbuffer.Setup(base_bindings.cbuf); - bind_ssbo_pushbuffer.Setup(base_bindings.gmem); - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -271,12 +290,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu, stage); - const GLintptr offset = + const auto [buffer, offset] = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); // Bind the emulation info buffer - bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo))); Shader shader{shader_cache.GetStageProgram(program)}; @@ -321,9 +339,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { base_bindings = next_bindings; } - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - SyncClipEnabled(clip_distances); gpu.dirty_flags.shaders = false; @@ -634,26 +649,46 @@ void RasterizerOpenGL::DrawArrays() { Maxwell::MaxShaderStage; // Add space for at least 18 constant buffers - buffer_size += - Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); + buffer_size += Maxwell::MaxConstBuffers * + (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); - const bool invalidate = buffer_cache.Map(buffer_size); - if (invalidate) { - // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty_flags.vertex_array.set(); - } + // Prepare the vertex array. + buffer_cache.Map(buffer_size); + // Prepare vertex array format. const GLuint vao = SetupVertexFormat(); + vertex_array_pushbuffer.Setup(vao); + + // Upload vertex and index data. SetupVertexBuffer(vao); + const GLintptr index_buffer_offset = SetupIndexBuffer(); - DrawParameters params = SetupDraw(); + // Setup draw parameters. It will automatically choose what glDraw* method to use. + const DrawParameters params = SetupDraw(index_buffer_offset); + + // Prepare packed bindings. + bind_ubo_pushbuffer.Setup(0); + bind_ssbo_pushbuffer.Setup(0); + + // Setup shaders and their used resources. texture_cache.GuardSamplers(true); SetupShaders(params.primitive_mode); texture_cache.GuardSamplers(false); ConfigureFramebuffers(state); - buffer_cache.Unmap(); + // Signal the buffer cache that we are not going to upload more things. + const bool invalidate = buffer_cache.Unmap(); + + // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. + vertex_array_pushbuffer.Bind(); + bind_ubo_pushbuffer.Bind(); + bind_ssbo_pushbuffer.Bind(); + + if (invalidate) { + // As all cached buffers are invalidated, we need to recheck their state. + gpu.dirty_flags.vertex_array.set(); + } shader_program_manager->ApplyTo(state); state.Apply(); @@ -675,7 +710,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } texture_cache.FlushRegion(addr, size); - global_cache.FlushRegion(addr, size); + buffer_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -685,7 +720,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { } texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); - global_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); } @@ -696,6 +730,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void RasterizerOpenGL::TickFrame() { + buffer_cache.TickFrame(); +} + bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { @@ -739,11 +777,9 @@ void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::Sh MICROPROFILE_SCOPE(OpenGL_UBO); const auto stage_index = static_cast<std::size_t>(stage); const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; - const auto& entries = shader->GetShaderEntries().const_buffers; // Upload only the enabled buffers from the 16 constbuffers of each shader stage - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { - const auto& entry = entries[bindpoint]; + for (const auto& entry : shader->GetShaderEntries().const_buffers) { SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); } } @@ -752,46 +788,34 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b const GLShader::ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(0, 0, 0); + bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); return; } - std::size_t size; - if (entry.IsIndirect()) { - // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size; - - if (size > MaxConstbufferSize) { - LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); - size = MaxConstbufferSize; - } - } else { - // Buffer is accessed directly, upload just what we use - size = entry.GetSize(); - } - // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 // UBO alignment requirements. - size = Common::AlignUp(size, sizeof(GLvec4)); - ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); + const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); - const std::size_t alignment = device.GetUniformBufferAlignment(); - const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); - bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); + const auto alignment = device.GetUniformBufferAlignment(); + const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); + bind_ubo_pushbuffer.Push(cbuf, offset, size); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader) { - const auto& entries = shader->GetShaderEntries().global_memory_entries; - for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { - const auto& entry{entries[bindpoint]}; - const auto& region{global_cache.GetGlobalRegion(entry, stage)}; - if (entry.IsWritten()) { - region->MarkAsModified(true, global_cache); - } - bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, - static_cast<GLsizeiptr>(region->GetSizeInBytes())); + auto& gpu{system.GPU()}; + auto& memory_manager{gpu.MemoryManager()}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; + const auto alignment{device.GetShaderStorageBufferAlignment()}; + + for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { + const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; + const auto actual_addr{memory_manager.Read<u64>(addr)}; + const auto size{memory_manager.Read<u32>(addr + 8)}; + + const auto [ssbo, buffer_offset] = + buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); + bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d238c12577..40b571d586 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,7 +24,6 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" -#include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -63,6 +62,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; @@ -73,11 +73,6 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr std::size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: struct FramebufferConfigState { bool using_color_fb{}; @@ -191,7 +186,6 @@ private: TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; - GlobalRegionCacheOpenGL global_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; @@ -210,6 +204,7 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; + VertexArrayPushBuffer vertex_array_pushbuffer; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; @@ -222,7 +217,9 @@ private: void SetupVertexBuffer(GLuint vao); - DrawParameters SetupDraw(); + GLintptr SetupIndexBuffer(); + + DrawParameters SetupDraw(GLintptr index_buffer_offset); void SetupShaders(GLenum primitive_mode); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 5d76ee12db..32dd9eae79 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -190,8 +190,11 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn const auto texture_buffer_usage{variant.texture_buffer_usage}; std::string source = "#version 430 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n\n"; - source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); + "#extension GL_ARB_separate_shader_objects : enable\n"; + if (entries.shader_viewport_layer_array) { + source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; + } + source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); for (const auto& cbuf : entries.const_buffers) { source += diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index bfc975a04d..119073776a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -14,6 +14,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -46,7 +47,7 @@ using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureAoffi, TextureArgument>; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = - static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); + static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); class ShaderWriter { public: @@ -246,6 +247,8 @@ public: usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); + entries.shader_viewport_layer_array = + stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); entries.shader_length = ir.GetLength(); return entries; } @@ -282,22 +285,35 @@ private: } void DeclareVertexRedeclarations() { - bool clip_distances_declared = false; - code.AddLine("out gl_PerVertex {{"); ++code.scope; code.AddLine("vec4 gl_Position;"); - for (const auto o : ir.GetOutputAttributes()) { - if (o == Attribute::Index::PointSize) - code.AddLine("float gl_PointSize;"); - if (!clip_distances_declared && (o == Attribute::Index::ClipDistances0123 || - o == Attribute::Index::ClipDistances4567)) { + for (const auto attribute : ir.GetOutputAttributes()) { + if (attribute == Attribute::Index::ClipDistances0123 || + attribute == Attribute::Index::ClipDistances4567) { code.AddLine("float gl_ClipDistance[];"); - clip_distances_declared = true; + break; } } + if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { + if (ir.UsesLayer()) { + code.AddLine("int gl_Layer;"); + } + if (ir.UsesViewportIndex()) { + code.AddLine("int gl_ViewportIndex;"); + } + } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && + !device.HasVertexViewportLayer()) { + LOG_ERROR( + Render_OpenGL, + "GL_ARB_shader_viewport_layer_array is not available and its required by a shader"); + } + + if (ir.UsesPointSize()) { + code.AddLine("float gl_PointSize;"); + } --code.scope; code.AddLine("}};"); @@ -805,6 +821,45 @@ private: return CastOperand(VisitOperand(operation, operand_index), type); } + std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) { + switch (const auto attribute = abuf->GetIndex()) { + case Attribute::Index::Position: + return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false); + case Attribute::Index::LayerViewportPointSize: + switch (abuf->GetElement()) { + case 0: + UNIMPLEMENTED(); + return {}; + case 1: + if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { + return {}; + } + return std::make_pair("gl_Layer", true); + case 2: + if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { + return {}; + } + return std::make_pair("gl_ViewportIndex", true); + case 3: + UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader"); + return std::make_pair("gl_PointSize", false); + } + return {}; + case Attribute::Index::ClipDistances0123: + return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false); + case Attribute::Index::ClipDistances4567: + return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), + false); + default: + if (IsGenericAttribute(attribute)) { + return std::make_pair( + GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false); + } + UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); + return {}; + } + } + std::string CastOperand(const std::string& value, Type type) const { switch (type) { case Type::Bool: @@ -1001,6 +1056,8 @@ private: const Node& src = operation[1]; std::string target; + bool is_integer = false; + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op @@ -1009,26 +1066,12 @@ private: target = GetRegister(gpr->GetIndex()); } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); - - target = [&]() -> std::string { - switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { - case Attribute::Index::Position: - return "gl_Position"s + GetSwizzle(abuf->GetElement()); - case Attribute::Index::PointSize: - return "gl_PointSize"; - case Attribute::Index::ClipDistances0123: - return fmt::format("gl_ClipDistance[{}]", abuf->GetElement()); - case Attribute::Index::ClipDistances4567: - return fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4); - default: - if (IsGenericAttribute(attribute)) { - return GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()); - } - UNIMPLEMENTED_MSG("Unhandled output attribute: {}", - static_cast<u32>(attribute)); - return "0"; - } - }(); + const auto result = GetOutputAttribute(abuf); + if (!result) { + return {}; + } + target = result->first; + is_integer = result->second; } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { @@ -1040,7 +1083,11 @@ private: UNREACHABLE_MSG("Assign called without a proper target"); } - code.AddLine("{} = {};", target, Visit(src)); + if (is_integer) { + code.AddLine("{} = ftoi({});", target, Visit(src)); + } else { + code.AddLine("{} = {};", target, Visit(src)); + } return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 14d11c7fc8..02586736dc 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -78,6 +78,7 @@ struct ShaderEntries { std::vector<ImageEntry> images; std::vector<GlobalMemoryEntry> global_memory_entries; std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + bool shader_viewport_layer_array{}; std::size_t shader_length{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 10688397bc..7893d1e263 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -373,6 +373,12 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn } } + bool shader_viewport_layer_array{}; + if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { + return {}; + } + entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; + u64 shader_length{}; if (!LoadObjectFromPrecompiled(shader_length)) { return {}; @@ -445,6 +451,10 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: } } + if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { + return false; + } + if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { return false; } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index d86e137ac1..0eae98afef 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -6,8 +6,11 @@ #include <glad/glad.h> #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "video_core/renderer_opengl/gl_state.h" +MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); + namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const { } void OpenGLState::Apply() const { + MICROPROFILE_SCOPE(OpenGL_State); ApplyFramebufferState(); ApplyVertexArrayState(); ApplyShaderProgram(); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 08ae1a429b..b1f6bc7c20 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType; MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); +MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", + MP_RGB(128, 192, 128)); namespace { @@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, } void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { + MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); const auto& src_params = src_surface->GetSurfaceParams(); const auto& dst_params = dst_surface->GetSurfaceParams(); UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b142521ecc..9ecdddb0d9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; -/// Swap buffers (render frame) void RendererOpenGL::SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { @@ -130,6 +129,8 @@ void RendererOpenGL::SwapBuffers( DrawScreen(render_window.GetFramebufferLayout()); + rasterizer->TickFrame(); + render_window.SwapBuffers(); } @@ -262,7 +263,6 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - // Initialize sRGB Usage OpenGLState::ClearsRGBUsed(); rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); } diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 68c36988dd..c504a2c1aa 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -13,29 +13,67 @@ namespace OpenGL { +VertexArrayPushBuffer::VertexArrayPushBuffer() = default; + +VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; + +void VertexArrayPushBuffer::Setup(GLuint vao_) { + vao = vao_; + index_buffer = nullptr; + vertex_buffers.clear(); +} + +void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { + index_buffer = buffer; +} + +void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, + GLintptr offset, GLsizei stride) { + vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); +} + +void VertexArrayPushBuffer::Bind() { + if (index_buffer) { + glVertexArrayElementBuffer(vao, *index_buffer); + } + + // TODO(Rodrigo): Find a way to ARB_multi_bind this + for (const auto& entry : vertex_buffers) { + glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, + entry.stride); + } +} + BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; void BindBuffersRangePushBuffer::Setup(GLuint first_) { first = first_; - buffers.clear(); + buffer_pointers.clear(); offsets.clear(); sizes.clear(); } -void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { - buffers.push_back(buffer); +void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { + buffer_pointers.push_back(buffer); offsets.push_back(offset); sizes.push_back(size); } -void BindBuffersRangePushBuffer::Bind() const { - const std::size_t count{buffers.size()}; +void BindBuffersRangePushBuffer::Bind() { + // Ensure sizes are valid. + const std::size_t count{buffer_pointers.size()}; DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); if (count == 0) { return; } + + // Dereference buffers. + buffers.resize(count); + std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), + [](const GLuint* pointer) { return *pointer; }); + glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(), sizes.data()); } diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 4a752f3b49..6c2b455466 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,20 +11,49 @@ namespace OpenGL { -class BindBuffersRangePushBuffer { +class VertexArrayPushBuffer final { public: - BindBuffersRangePushBuffer(GLenum target); + explicit VertexArrayPushBuffer(); + ~VertexArrayPushBuffer(); + + void Setup(GLuint vao_); + + void SetIndexBuffer(const GLuint* buffer); + + void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, + GLsizei stride); + + void Bind(); + +private: + struct Entry { + GLuint binding_index{}; + const GLuint* buffer{}; + GLintptr offset{}; + GLsizei stride{}; + }; + + GLuint vao{}; + const GLuint* index_buffer{}; + std::vector<Entry> vertex_buffers; +}; + +class BindBuffersRangePushBuffer final { +public: + explicit BindBuffersRangePushBuffer(GLenum target); ~BindBuffersRangePushBuffer(); void Setup(GLuint first_); - void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); + void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); - void Bind() const; + void Bind(); private: - GLenum target; - GLuint first; + GLenum target{}; + GLuint first{}; + std::vector<const GLuint*> buffer_pointers; + std::vector<GLuint> buffers; std::vector<GLintptr> offsets; std::vector<GLsizeiptr> sizes; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 1bb04607bc..9b2d8e987d 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -430,20 +430,17 @@ private: instance_index = DeclareBuiltIn(spv::BuiltIn::InstanceIndex, spv::StorageClass::Input, t_in_uint, "instance_index"); - bool is_point_size_declared = false; bool is_clip_distances_declared = false; for (const auto index : ir.GetOutputAttributes()) { - if (index == Attribute::Index::PointSize) { - is_point_size_declared = true; - } else if (index == Attribute::Index::ClipDistances0123 || - index == Attribute::Index::ClipDistances4567) { + if (index == Attribute::Index::ClipDistances0123 || + index == Attribute::Index::ClipDistances4567) { is_clip_distances_declared = true; } } std::vector<Id> members; members.push_back(t_float4); - if (is_point_size_declared) { + if (ir.UsesPointSize()) { members.push_back(t_float); } if (is_clip_distances_declared) { @@ -466,7 +463,7 @@ private: position_index = MemberDecorateBuiltIn(spv::BuiltIn::Position, "position", true); point_size_index = - MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", is_point_size_declared); + MemberDecorateBuiltIn(spv::BuiltIn::PointSize, "point_size", ir.UsesPointSize()); clip_distances_index = MemberDecorateBuiltIn(spv::BuiltIn::ClipDistance, "clip_distances", is_clip_distances_declared); @@ -712,7 +709,8 @@ private: case Attribute::Index::Position: return AccessElement(t_out_float, per_vertex, position_index, abuf->GetElement()); - case Attribute::Index::PointSize: + case Attribute::Index::LayerViewportPointSize: + UNIMPLEMENTED_IF(abuf->GetElement() != 3); return AccessElement(t_out_float, per_vertex, point_size_index); case Attribute::Index::ClipDistances0123: return AccessElement(t_out_float, per_vertex, clip_distances_index, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 24f022cc04..77151a24be 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -95,12 +95,8 @@ const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::Image const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register{GetRegister(reg)}; - const Node base_image{ + const auto [base_image, cbuf_index, cbuf_offset]{ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf{std::get_if<CbufNode>(&*base_image)}; - const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())}; - const auto cbuf_offset{cbuf_offset_imm->GetValue()}; - const auto cbuf_index{cbuf->GetIndex()}; const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; // If this image has already been used, return the existing mapping. diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 80fc0ccfc1..ab207a33be 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -297,18 +297,13 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeB const auto addr_register{GetRegister(instr.gmem.gpr)}; const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; - const Node base_address{ - TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; - const auto cbuf = std::get_if<CbufNode>(&*base_address); - ASSERT(cbuf != nullptr); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - - bb.push_back( - Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); - - const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; + const auto [base_address, index, offset] = + TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); + ASSERT(base_address != nullptr); + + bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); + + const GlobalMemoryBase descriptor{index, offset}; const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); auto& usage = entry->second; if (is_write) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index cb480be9bd..e1ee5c190d 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -269,7 +269,13 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); } - WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); + const Node4 components = GetTldsCode(instr, texture_type, is_array); + + if (instr.tlds.fp32_flag) { + WriteTexsInstructionFloat(bb, instr, components); + } else { + WriteTexsInstructionHalfFloat(bb, instr, components); + } break; } default: @@ -302,13 +308,9 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, bool is_array, bool is_shadow) { const Node sampler_register = GetRegister(reg); - const Node base_sampler = + const auto [base_sampler, cbuf_index, cbuf_offset] = TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); - const auto cbuf = std::get_if<CbufNode>(&*base_sampler); - const auto cbuf_offset_imm = std::get_if<ImmediateNode>(&*cbuf->GetOffset()); - ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue(); - const auto cbuf_index = cbuf->GetIndex(); + ASSERT(base_sampler != nullptr); const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); // If this sampler has already been used, return the existing mapping. diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 5994bfc4e9..78bd1cf1e5 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -61,7 +61,16 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { const auto [entry, is_new] = used_cbufs.try_emplace(index); entry->second.MarkAsUsedIndirect(); - const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); + const Node final_offset = [&]() { + // Attempt to inline constant buffer without a variable offset. This is done to allow + // tracking LDC calls. + if (const auto gpr = std::get_if<GprNode>(&*node)) { + if (gpr->GetIndex() == Register::ZeroIndex) { + return Immediate(offset); + } + } + return Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); + }(); return MakeNode<CbufNode>(index, final_offset); } @@ -89,6 +98,22 @@ Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_addres } Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { + if (index == Attribute::Index::LayerViewportPointSize) { + switch (element) { + case 0: + UNIMPLEMENTED(); + break; + case 1: + uses_layer = true; + break; + case 2: + uses_viewport_index = true; + break; + case 3: + uses_point_size = true; + break; + } + } if (index == Attribute::Index::ClipDistances0123 || index == Attribute::Index::ClipDistances4567) { const auto clip_index = diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 6145f0a707..126c781369 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -115,6 +115,18 @@ public: return static_cast<std::size_t>(coverage_end * sizeof(u64)); } + bool UsesLayer() const { + return uses_layer; + } + + bool UsesViewportIndex() const { + return uses_viewport_index; + } + + bool UsesPointSize() const { + return uses_point_size; + } + bool HasPhysicalAttributes() const { return uses_physical_attributes; } @@ -316,7 +328,7 @@ private: void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, Node op_c, Node imm_lut, bool sets_cc); - Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; + std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; @@ -346,6 +358,9 @@ private: std::set<Image> used_images; std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; + bool uses_layer{}; + bool uses_viewport_index{}; + bool uses_point_size{}; bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes Tegra::Shader::Header header; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index fc957d980e..dc132a4a3d 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -32,39 +32,44 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, } return {}; } -} // namespace +} // Anonymous namespace -Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const { +std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, + s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { - // Cbuf found, but it has to be immediate - return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr; + // Constant buffer found, test if it's an immediate + const auto offset = cbuf->GetOffset(); + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { + return {tracked, cbuf->GetIndex(), immediate->GetValue()}; + } + return {}; } if (const auto gpr = std::get_if<GprNode>(&*tracked)) { if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { - return nullptr; + return {}; } // Reduce the cursor in one to avoid infinite loops when the instruction sets the same // register that it uses as operand const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); if (!source) { - return nullptr; + return {}; } return TrackCbuf(source, code, new_cursor); } if (const auto operation = std::get_if<OperationNode>(&*tracked)) { for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) { - if (const auto found = TrackCbuf((*operation)[i], code, cursor)) { - // Cbuf found in operand + if (auto found = TrackCbuf((*operation)[i], code, cursor); std::get<0>(found)) { + // Cbuf found in operand. return found; } } - return nullptr; + return {}; } if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { const auto& conditional_code = conditional->GetCode(); return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); } - return nullptr; + return {}; } std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7a0fdb19bc..6af9044cad 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) // Linear Surface check if (!params.is_tiled) { - if (std::tie(params.width, params.height, params.pitch) == - std::tie(rhs.width, rhs.height, rhs.pitch)) { - return MatchStructureResult::FullMatch; + if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { + if (params.width == rhs.width) { + return MatchStructureResult::FullMatch; + } else { + return MatchStructureResult::SemiMatch; + } } return MatchStructureResult::None; } diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 8ba386a8ac..bcce8d8634 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -200,8 +200,9 @@ public: modification_tick = tick; } - void MarkAsRenderTarget(const bool is_target) { + void MarkAsRenderTarget(const bool is_target, const u32 index) { this->is_target = is_target; + this->index = index; } void MarkAsPicked(const bool is_picked) { @@ -221,6 +222,10 @@ public: return is_target; } + u32 GetRenderTarget() const { + return index; + } + bool IsRegistered() const { return is_registered; } @@ -307,10 +312,13 @@ private: return view; } + static constexpr u32 NO_RT = 0xFFFFFFFF; + bool is_modified{}; bool is_target{}; bool is_registered{}; bool is_picked{}; + u32 index{NO_RT}; u64 modification_tick{}; }; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9c56e2b4f1..fd54724513 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const { - const bool tiled{as_host_size ? false : is_tiled}; const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; const u32 depth{is_layered ? 1U : GetMipDepth(level)}; - return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, - GetMipBlockHeight(level), GetMipBlockDepth(level)); + if (is_tiled) { + return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, + depth, GetMipBlockHeight(level), + GetMipBlockDepth(level)); + } else if (as_host_size || IsBuffer()) { + return GetBytesPerPixel() * width * height * depth; + } else { + // Linear Texture Case + return pitch * height * depth; + } } bool SurfaceParams::operator==(const SurfaceParams& rhs) const { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c9e72531a5..7f9623c623 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -133,11 +133,11 @@ public: regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = surface_view.first; depth_buffer.view = surface_view.second; if (depth_buffer.target) - depth_buffer.target->MarkAsRenderTarget(true); + depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); return surface_view.second; } @@ -167,11 +167,11 @@ public: auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents, true); if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target->MarkAsRenderTarget(false, NO_RT); render_targets[index].target = surface_view.first; render_targets[index].view = surface_view.second; if (render_targets[index].target) - render_targets[index].target->MarkAsRenderTarget(true); + render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); return surface_view.second; } @@ -191,7 +191,7 @@ public: if (depth_buffer.target == nullptr) { return; } - depth_buffer.target->MarkAsRenderTarget(false); + depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = nullptr; depth_buffer.view = nullptr; } @@ -200,7 +200,7 @@ public: if (render_targets[index].target == nullptr) { return; } - render_targets[index].target->MarkAsRenderTarget(false); + render_targets[index].target->MarkAsRenderTarget(false, NO_RT); render_targets[index].target = nullptr; render_targets[index].view = nullptr; } @@ -270,6 +270,16 @@ protected: // and reading it from a sepparate buffer. virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; + void ManageRenderTargetUnregister(TSurface& surface) { + auto& maxwell3d = system.GPU().Maxwell3D(); + const u32 index = surface->GetRenderTarget(); + if (index == DEPTH_RT) { + maxwell3d.dirty_flags.zeta_buffer = true; + } else { + maxwell3d.dirty_flags.color_buffer.set(index, true); + } + } + void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); @@ -294,6 +304,9 @@ protected: if (guard_render_targets && surface->IsProtected()) { return; } + if (!guard_render_targets && surface->IsRenderTarget()) { + ManageRenderTargetUnregister(surface); + } const GPUVAddr gpu_addr = surface->GetGpuAddr(); const CacheAddr cache_ptr = surface->GetCacheAddr(); const std::size_t size = surface->GetSizeInBytes(); @@ -649,15 +662,6 @@ private: } return {current_surface, *view}; } - // The next case is unsafe, so if we r in accurate GPU, just skip it - if (Settings::values.use_accurate_gpu_emulation) { - return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, - MatchTopologyResult::FullMatch); - } - // This is the case the texture is a part of the parent. - if (current_surface->MatchesSubTexture(params, gpu_addr)) { - return RebuildSurface(current_surface, params, is_render); - } } else { // If there are many overlaps, odds are they are subtextures of the candidate // surface. We try to construct a new surface based on the candidate parameters, @@ -793,6 +797,9 @@ private: static constexpr u64 registry_page_size{1 << registry_page_bits}; std::unordered_map<CacheAddr, std::vector<TSurface>> registry; + static constexpr u32 DEPTH_RT = 8; + static constexpr u32 NO_RT = 0xFFFFFFFF; + // The L1 Cache is used for fast texture lookup before checking the overlaps // This avoids calculating size and other stuffs. std::unordered_map<CacheAddr, TSurface> l1_cache; |