diff options
| -rw-r--r-- | .build.yml | 9 | ||||
| -rw-r--r-- | LICENSE | 661 | ||||
| -rw-r--r-- | README.md | 15 | ||||
| -rw-r--r-- | buffers.go | 92 | ||||
| -rw-r--r-- | buffers_test.go | 40 | ||||
| -rw-r--r-- | errors.go | 12 | ||||
| -rw-r--r-- | git.go | 2 | ||||
| -rw-r--r-- | go.mod | 3 | ||||
| -rw-r--r-- | hash.go | 40 | ||||
| -rw-r--r-- | hash_test.go | 33 | ||||
| -rw-r--r-- | headers.go | 6 | ||||
| -rw-r--r-- | ident.go | 123 | ||||
| -rw-r--r-- | ident_test.go | 64 | ||||
| -rw-r--r-- | loose.go | 88 | ||||
| -rw-r--r-- | obj.go | 119 | ||||
| -rw-r--r-- | obj_blob.go | 33 | ||||
| -rw-r--r-- | obj_commit.go | 124 | ||||
| -rw-r--r-- | obj_tag.go | 145 | ||||
| -rw-r--r-- | obj_tree.go | 110 | ||||
| -rw-r--r-- | objects_test.go | 186 | ||||
| -rw-r--r-- | pack_idx.go | 286 | ||||
| -rw-r--r-- | pack_pack.go | 473 | ||||
| -rw-r--r-- | pack_test.go | 214 | ||||
| -rw-r--r-- | refs.go | 94 | ||||
| -rw-r--r-- | repo.go | 82 | ||||
| -rw-r--r-- | repo_test.go | 122 |
26 files changed, 3176 insertions, 0 deletions
diff --git a/.build.yml b/.build.yml new file mode 100644 index 00000000..ae584e7a --- /dev/null +++ b/.build.yml @@ -0,0 +1,9 @@ +image: alpine/edge +packages: + - golangci-lint + - go +tasks: + - build: | + cd furgit + go test + golangci-lint run . diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..be3f7b28 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +<https://www.gnu.org/licenses/>. diff --git a/README.md b/README.md new file mode 100644 index 00000000..0673ff0d --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +# Furgit + +[](https://builds.sr.ht/~runxiyu/furgit?) +[](https://pkg.go.dev/git.sr.ht/~runxiyu/furgit) + +Furgit is a fast implementation of Git in pure Go, extracted from +[an internal package of Lindenii Villosa](https://codeberg.org/lindenii/villosa/src/branch/master/villosad/internal/common/git). + +## Development + +- [Main SourceHut repository](https://git.sr.ht/~runxiyu/furgit) + and [public inbox](https://lists.sr.ht/~runxiyu/public-inbox) + for [patches](https://git-send-email.io/) and discussions +- [GitHub mirror](https://github.com/runxiyu/furgit) + (issues and PRs are welcome) diff --git a/buffers.go b/buffers.go new file mode 100644 index 00000000..ab3a3b76 --- /dev/null +++ b/buffers.go @@ -0,0 +1,92 @@ +package furgit + +import "sync" + +const ( + defaultBodyCap = 32 * 1024 + maxPooledBody = 8 << 20 +) + +type borrowedBody struct { + buf []byte + pooled bool +} + +var bodyPool = sync.Pool{ + New: func() any { + buf := make([]byte, 0, defaultBodyCap) + return &buf + }, +} + +func borrowBody(capHint int) borrowedBody { + if capHint < defaultBodyCap { + capHint = defaultBodyCap + } + buf := bodyPool.Get().(*[]byte) + if cap(*buf) < capHint { + bodyPool.Put(buf) + newBuf := make([]byte, 0, capHint) + return borrowedBody{buf: newBuf, pooled: false} + } + slice := (*buf)[:0] + return borrowedBody{buf: slice, pooled: true} +} + +func borrowedFromOwned(buf []byte) borrowedBody { + return borrowedBody{buf: buf} +} + +func (b *borrowedBody) Resize(n int) { + if n < 0 { + n = 0 + } + b.ensureCapacity(n) + b.buf = b.buf[:n] +} + +func (b *borrowedBody) Append(src []byte) { + if len(src) == 0 { + return + } + start := len(b.buf) + b.ensureCapacity(start + len(src)) + b.buf = b.buf[:start+len(src)] + copy(b.buf[start:], src) +} + +func (b *borrowedBody) Bytes() []byte { + return b.buf +} + +func (b *borrowedBody) Release() { + if b.buf == nil { + return + } + if b.pooled && cap(b.buf) <= maxPooledBody { + tmp := b.buf[:0] + bodyPool.Put(&tmp) + } + b.buf = nil + b.pooled = false +} + +func (b *borrowedBody) ensureCapacity(needed int) { + if cap(b.buf) >= needed { + return + } + old := b.buf + wasPooled := b.pooled + newCap := cap(b.buf) * 2 + if newCap < needed { + newCap = needed + } + newBuf := make([]byte, len(b.buf), newCap) + copy(newBuf, b.buf) + b.buf = newBuf + b.pooled = false + if wasPooled && cap(old) <= maxPooledBody { + tmp := old[:0] + bodyPool.Put(&tmp) + } +} diff --git a/buffers_test.go b/buffers_test.go new file mode 100644 index 00000000..aae431e5 --- /dev/null +++ b/buffers_test.go @@ -0,0 +1,40 @@ +package furgit + +import "testing" + +func TestBorrowBodyResizeAndAppend(t *testing.T) { + b := borrowBody(1) + defer b.Release() + + if cap(b.buf) < defaultBodyCap { + t.Fatalf("expected capacity >= %d, got %d", defaultBodyCap, cap(b.buf)) + } + + b.Append([]byte("alpha")) + b.Append([]byte("beta")) + if got := string(b.Bytes()); got != "alphabeta" { + t.Fatalf("unexpected contents: %q", got) + } + + b.Resize(3) + if got := string(b.Bytes()); got != "alp" { + t.Fatalf("resize shrink mismatch: %q", got) + } + + b.Resize(8) + if len(b.Bytes()) != 8 { + t.Fatalf("expected len 8 after grow, got %d", len(b.Bytes())) + } + if prefix := string(b.Bytes()[:3]); prefix != "alp" { + t.Fatalf("prefix lost after grow: %q", prefix) + } +} + +func TestBorrowBodyRelease(t *testing.T) { + b := borrowBody(defaultBodyCap / 2) + b.Append([]byte("data")) + b.Release() + if b.buf != nil { + t.Fatal("expected buffer cleared after release") + } +} diff --git a/errors.go b/errors.go new file mode 100644 index 00000000..675586f5 --- /dev/null +++ b/errors.go @@ -0,0 +1,12 @@ +package furgit + +import "errors" + +var ( + // ErrInvalidObject indicates malformed serialized data. + ErrInvalidObject = errors.New("furgit: invalid object encoding") + // ErrInvalidRef indicates malformed refs. + ErrInvalidRef = errors.New("furgit: invalid ref") + // ErrNotFound indicates missing refs/objects. + ErrNotFound = errors.New("furgit: not found") +) @@ -0,0 +1,2 @@ +// Package furgit implements low-level Git operations. +package furgit @@ -0,0 +1,3 @@ +module git.sr.ht/~runxiyu/furgit + +go 1.18 diff --git a/hash.go b/hash.go new file mode 100644 index 00000000..f03866f1 --- /dev/null +++ b/hash.go @@ -0,0 +1,40 @@ +package furgit + +import ( + "crypto/sha1" + "encoding/hex" + "fmt" +) + +// To change the hash algorithm you probably only need to change these two lines... + +const HashSize = sha1.Size + +var newHash = sha1.Sum + +// Hash represents a Git object identifier. +type Hash [HashSize]byte + +// ParseHash converts a hex string into an Hash. +func ParseHash(s string) (Hash, error) { + var id Hash + if len(s) != HashSize*2 { + return id, fmt.Errorf("furgit: invalid hash length %d", len(s)) + } + data, err := hex.DecodeString(s) + if err != nil { + return id, fmt.Errorf("furgit: decode hash: %w", err) + } + copy(id[:], data) + return id, nil +} + +// String renders the ID as hex. +func (id Hash) String() string { + return hex.EncodeToString(id[:]) +} + +// Bytes returns a mutable copy of the underlying bytes. +func (id Hash) Bytes() []byte { + return append([]byte(nil), id[:]...) +} diff --git a/hash_test.go b/hash_test.go new file mode 100644 index 00000000..95660d88 --- /dev/null +++ b/hash_test.go @@ -0,0 +1,33 @@ +package furgit + +import "testing" + +func TestParseHashValidAndInvalid(t *testing.T) { + const hex40 = "0123456789abcdef0123456789abcdef01234567" + id, err := ParseHash(hex40) + if err != nil { + t.Fatalf("ParseHash returned error: %v", err) + } + if got := id.String(); got != hex40 { + t.Fatalf("unexpected String result: %q", got) + } + + if _, err := ParseHash("abcd"); err == nil { + t.Fatal("expected error for short hash") + } + if _, err := ParseHash("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"); err == nil { + t.Fatal("expected error for non-hex input") + } +} + +func TestHashBytesCopiesUnderlyingData(t *testing.T) { + var id Hash + for i := range id { + id[i] = byte(i) + } + orig := id.Bytes() + orig[0] ^= 0xff + if id[0] == orig[0] { + t.Fatal("Bytes should return a copy") + } +} diff --git a/headers.go b/headers.go new file mode 100644 index 00000000..0efc5398 --- /dev/null +++ b/headers.go @@ -0,0 +1,6 @@ +package furgit + +type ExtraHeader struct { + Key string + Value []byte +} diff --git a/ident.go b/ident.go new file mode 100644 index 00000000..48230182 --- /dev/null +++ b/ident.go @@ -0,0 +1,123 @@ +package furgit + +import ( + "bytes" + "errors" + "fmt" + "math" + "strconv" + "strings" + "time" +) + +// Ident models an author/committer identity together with its timestamp +// and timezone offset, mirroring the fields that appear in Git objects. +type Ident struct { + Name []byte + Email []byte + WhenUnix int64 + OffsetMinutes int32 +} + +// parseIdent parses an identity line from the canonical Git format: +// "Name <email> 123456789 +0000". +func parseIdent(line []byte) (*Ident, error) { + lt := bytes.IndexByte(line, '<') + if lt < 0 { + return nil, errors.New("furgit: ident: missing opening <") + } + gtRel := bytes.IndexByte(line[lt+1:], '>') + if gtRel < 0 { + return nil, errors.New("furgit: ident: missing closing >") + } + gt := lt + 1 + gtRel + nameBytes := append([]byte(nil), line[:lt]...) + emailBytes := append([]byte(nil), line[lt+1:gt]...) + + rest := line[gt+1:] + if len(rest) == 0 || rest[0] != ' ' { + return nil, errors.New("furgit: ident: missing timestamp separator") + } + rest = rest[1:] + sp := bytes.IndexByte(rest, ' ') + if sp < 0 { + return nil, errors.New("furgit: ident: missing timezone separator") + } + whenStr := string(rest[:sp]) + when, err := strconv.ParseInt(whenStr, 10, 64) + if err != nil { + return nil, fmt.Errorf("furgit: ident: invalid timestamp: %w", err) + } + + tz := rest[sp+1:] + if len(tz) < 5 { + return nil, errors.New("furgit: ident: invalid timezone encoding") + } + sign := 1 + switch tz[0] { + case '-': + sign = -1 + case '+': + default: + return nil, errors.New("furgit: ident: invalid timezone sign") + } + + hh, err := strconv.Atoi(string(tz[1:3])) + if err != nil { + return nil, fmt.Errorf("furgit: ident: invalid timezone hours: %w", err) + } + mm, err := strconv.Atoi(string(tz[3:5])) + if err != nil { + return nil, fmt.Errorf("furgit: ident: invalid timezone minutes: %w", err) + } + if hh < 0 || hh > 23 { + return nil, errors.New("furgit: ident: invalid timezone hours range") + } + if mm < 0 || mm > 59 { + return nil, errors.New("furgit: ident: invalid timezone minutes range") + } + total := int64(hh)*60 + int64(mm) + if total > math.MaxInt32 { + return nil, errors.New("furgit: ident: timezone overflow") + } + offset := int32(total) + if sign < 0 { + offset = -offset + } + + return &Ident{ + Name: nameBytes, + Email: emailBytes, + WhenUnix: when, + OffsetMinutes: offset, + }, nil +} + +// Serialize renders an Ident into canonical Git format. +func (id Ident) Serialize() []byte { + var b strings.Builder + b.Grow(len(id.Name) + len(id.Email) + 32) + b.Write(id.Name) + b.WriteString(" <") + b.Write(id.Email) + b.WriteString("> ") + b.WriteString(strconv.FormatInt(id.WhenUnix, 10)) + b.WriteByte(' ') + + offset := id.OffsetMinutes + sign := '+' + if offset < 0 { + sign = '-' + offset = -offset + } + hh := offset / 60 + mm := offset % 60 + fmt.Fprintf(&b, "%c%02d%02d", sign, hh, mm) + return []byte(b.String()) +} + +// When returns the timestamp as time.Time using the embedded offset. +func (id Ident) When() time.Time { + loc := time.FixedZone("git", int(id.OffsetMinutes)*60) + return time.Unix(id.WhenUnix, 0).In(loc) +} diff --git a/ident_test.go b/ident_test.go new file mode 100644 index 00000000..032dee4c --- /dev/null +++ b/ident_test.go @@ -0,0 +1,64 @@ +package furgit + +import ( + "strings" + "testing" +) + +func TestParseIdentRoundTrip(t *testing.T) { + line := []byte("Alice Example <alice@example.com> 1700000000 -0700") + id, err := parseIdent(line) + if err != nil { + t.Fatalf("parseIdent error: %v", err) + } + if got := string(id.Email); got != "alice@example.com" { + t.Fatalf("email mismatch: %q", got) + } + serialized := string(id.Serialize()) + if !strings.Contains(serialized, "alice@example.com") { + t.Fatalf("Serialize missing email: %q", serialized) + } + when := id.When() + if when.Unix() != 1700000000 { + t.Fatalf("When unix mismatch: %d", when.Unix()) + } + if _, offset := when.Zone(); offset != -7*3600 { + t.Fatalf("When offset mismatch: %d", offset) + } +} + +func TestParseIdentInvalidInputs(t *testing.T) { + cases := []string{ + "MissingEmail 1700000000 +0000", + "Name <email> notanumber +0000", + "Name <email> 1700000000 123", + } + for _, tc := range cases { + if _, err := parseIdent([]byte(tc)); err == nil { + t.Fatalf("expected error for %q", tc) + } + } +} + +func TestIdentSerializeUsesCanonicalSpacing(t *testing.T) { + id := Ident{ + Name: []byte("Bob"), + Email: []byte("bob@example.com"), + WhenUnix: 1000, + OffsetMinutes: 90, + } + got := string(id.Serialize()) + if !strings.Contains(got, "Bob <bob@example.com>") { + t.Fatalf("unexpected serialize output: %q", got) + } + if !strings.HasSuffix(got, "+0130") { + t.Fatalf("expected timezone in +0130 form: %q", got) + } + loc := id.When() + if loc.Unix() != 1000 { + t.Fatalf("When unix mismatch: %d", loc.Unix()) + } + if _, offset := loc.Zone(); offset != 90*60 { + t.Fatalf("When offset mismatch: %d", offset) + } +} diff --git a/loose.go b/loose.go new file mode 100644 index 00000000..78c483c7 --- /dev/null +++ b/loose.go @@ -0,0 +1,88 @@ +package furgit + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "os" + "path/filepath" + "strconv" +) + +func loosePath(id Hash) string { + hex := id.String() + return filepath.Join("objects", hex[:2], hex[2:]) +} + +func (repo *Repository) looseRead(id Hash) (Object, error) { + ty, body, err := repo.looseReadTyped(id) + if err != nil { + return nil, err + } + return parseObjectBody(ty, id, body) +} + +func (repo *Repository) looseReadTyped(id Hash) (ObjType, []byte, error) { + path := repo.repoPath(loosePath(id)) + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return ObjInvalid, nil, ErrNotFound + } + return ObjInvalid, nil, err + } + defer func() { _ = f.Close() }() + + zr, err := zlib.NewReader(f) + if err != nil { + return ObjInvalid, nil, err + } + defer func() { _ = zr.Close() }() + + raw, err := io.ReadAll(zr) + if err != nil { + return ObjInvalid, nil, err + } + + nul := bytes.IndexByte(raw, 0) + if nul < 0 { + return ObjInvalid, nil, ErrInvalidObject + } + + header := raw[:nul] + body := raw[nul+1:] + + space := bytes.IndexByte(header, ' ') + if space < 0 { + return ObjInvalid, nil, ErrInvalidObject + } + tyStr := string(header[:space]) + var ty ObjType + switch tyStr { + case "blob": + ty = ObjBlob + case "tree": + ty = ObjTree + case "commit": + ty = ObjCommit + case "tag": + ty = ObjTag + default: + return ObjInvalid, nil, ErrInvalidObject + } + expect := header[space+1:] + size, err := strconv.Atoi(string(expect)) + if err != nil { + return ObjInvalid, nil, fmt.Errorf("furgit: loose: size parse: %w", err) + } + if size != len(body) { + return ObjInvalid, nil, ErrInvalidObject + } + if !verifyRawObject(raw, id) { + return ObjInvalid, nil, ErrInvalidObject + } + + out := append([]byte(nil), body...) + return ty, out, nil +} @@ -0,0 +1,119 @@ +package furgit + +import ( + "bytes" + "errors" + "fmt" + "strconv" +) + +// ObjType mirrors Git's object type tags. +type ObjType uint8 + +const ( + ObjInvalid ObjType = 0 + ObjCommit ObjType = 1 + ObjTree ObjType = 2 + ObjBlob ObjType = 3 + ObjTag ObjType = 4 + ObjFuture ObjType = 5 + ObjOfsDelta ObjType = 6 + ObjRefDelta ObjType = 7 +) + +const ( + objNameBlob = "blob" + objNameTree = "tree" + objNameCommit = "commit" + objNameTag = "tag" +) + +// Object describes any Git object variant. +type Object interface { + ObjType() ObjType +} + +type objectBase struct { + Hash Hash +} + +func computeRawHash(data []byte) Hash { + var id Hash + sum := newHash(data) + copy(id[:], sum[:]) + return id +} + +func headerForType(ty ObjType, body []byte) ([]byte, error) { + var tyStr string + switch ty { + case ObjBlob: + tyStr = objNameBlob + case ObjTree: + tyStr = objNameTree + case ObjCommit: + tyStr = objNameCommit + case ObjTag: + tyStr = objNameTag + case ObjInvalid, ObjFuture, ObjOfsDelta, ObjRefDelta: + return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) + default: + return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) + } + size := strconv.Itoa(len(body)) + var buf bytes.Buffer + buf.Grow(len(tyStr) + len(size) + 1) + buf.WriteString(tyStr) + buf.WriteByte(' ') + buf.WriteString(size) + buf.WriteByte(0) + return buf.Bytes(), nil +} + +func verifyRawObject(buf []byte, want Hash) bool { + return computeRawHash(buf) == want +} + +func verifyTypedObject(ty ObjType, body []byte, want Hash) bool { + header, err := headerForType(ty, body) + if err != nil { + return false + } + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + return computeRawHash(raw) == want +} + +func parseObjectBody(ty ObjType, id Hash, body []byte) (Object, error) { + switch ty { + case ObjBlob: + return parseBlob(id, body) + case ObjTree: + return parseTree(id, body) + case ObjCommit: + return parseCommit(id, body) + case ObjTag: + return parseTag(id, body) + case ObjInvalid, ObjFuture, ObjOfsDelta, ObjRefDelta: + return nil, fmt.Errorf("furgit: object: unsupported type %d", ty) + default: + return nil, fmt.Errorf("furgit: object: unknown type %d", ty) + } +} + +// ReadObject resolves an ID by consulting loose then packed storage. +func (repo *Repository) ReadObject(id Hash) (Object, error) { + obj, err := repo.looseRead(id) + if err == nil { + return obj, nil + } + if !errors.Is(err, ErrNotFound) { + return nil, err + } + obj, err = repo.packRead(id) + if errors.Is(err, ErrNotFound) { + return nil, ErrInvalidObject + } + return obj, err +} diff --git a/obj_blob.go b/obj_blob.go new file mode 100644 index 00000000..eda0ca5f --- /dev/null +++ b/obj_blob.go @@ -0,0 +1,33 @@ +package furgit + +// Blob represents the contents of a Git blob. +type Blob struct { + objectBase + + Data []byte +} + +// ObjType allows Blob to satisfy the Object interface. +func (*Blob) ObjType() ObjType { + return ObjBlob +} + +func parseBlob(id Hash, body []byte) (*Blob, error) { + data := append([]byte(nil), body...) + return &Blob{ + objectBase: objectBase{Hash: id}, + Data: data, + }, nil +} + +// Serialize renders the full "blob size\\0body" representation. +func (b *Blob) Serialize() ([]byte, error) { + header, err := headerForType(ObjBlob, b.Data) + if err != nil { + return nil, err + } + raw := make([]byte, len(header)+len(b.Data)) + copy(raw, header) + copy(raw[len(header):], b.Data) + return raw, nil +} diff --git a/obj_commit.go b/obj_commit.go new file mode 100644 index 00000000..100c6b35 --- /dev/null +++ b/obj_commit.go @@ -0,0 +1,124 @@ +package furgit + +import ( + "bytes" + "errors" + "fmt" +) + +// Commit mirrors the structure of a Git commit object. +type Commit struct { + objectBase + + Tree Hash + Parents []Hash + Author Ident + Committer Ident + Message []byte + ExtraHeaders []ExtraHeader +} + +// ObjType allows Commit to satisfy the Object interface. +func (*Commit) ObjType() ObjType { + return ObjCommit +} + +func parseCommit(id Hash, body []byte) (*Commit, error) { + c := new(Commit) + c.objectBase = objectBase{Hash: id} + i := 0 + for i < len(body) { + rel := bytes.IndexByte(body[i:], '\n') + if rel < 0 { + return nil, errors.New("furgit: commit: missing newline") + } + line := body[i : i+rel] + i += rel + 1 + if len(line) == 0 { + break + } + + switch { + case bytes.HasPrefix(line, []byte("tree ")): + treeID, err := ParseHash(string(line[5:])) + if err != nil { + return nil, fmt.Errorf("furgit: commit: tree: %w", err) + } + c.Tree = treeID + case bytes.HasPrefix(line, []byte("parent ")): + parent, err := ParseHash(string(line[7:])) + if err != nil { + return nil, fmt.Errorf("furgit: commit: parent: %w", err) + } + c.Parents = append(c.Parents, parent) + case bytes.HasPrefix(line, []byte("author ")): + idt, err := parseIdent(line[7:]) + if err != nil { + return nil, fmt.Errorf("furgit: commit: author: %w", err) + } + c.Author = *idt + case bytes.HasPrefix(line, []byte("committer ")): + idt, err := parseIdent(line[10:]) + if err != nil { + return nil, fmt.Errorf("furgit: commit: committer: %w", err) + } + c.Committer = *idt + case bytes.HasPrefix(line, []byte("gpgsig ")), bytes.HasPrefix(line, []byte("gpgsig-sha256 ")): + // TODO: handle this + for i < len(body) { + nextRel := bytes.IndexByte(body[i:], '\n') + if nextRel < 0 { + return nil, errors.New("furgit: commit: unterminated gpgsig") + } + if body[i] != ' ' { + break + } + i += nextRel + 1 + } + default: + key, value, found := bytes.Cut(line, []byte{' '}) + if !found { + return nil, errors.New("furgit: commit: malformed header") + } + c.ExtraHeaders = append(c.ExtraHeaders, ExtraHeader{Key: string(key), Value: value}) + } + } + + if i > len(body) { + return nil, ErrInvalidObject + } + + c.Message = append([]byte(nil), body[i:]...) + return c, nil +} + +func commitBody(c *Commit) []byte { + var buf bytes.Buffer + fmt.Fprintf(&buf, "tree %s\n", c.Tree.String()) + for _, p := range c.Parents { + fmt.Fprintf(&buf, "parent %s\n", p.String()) + } + buf.WriteString("author ") + buf.Write(c.Author.Serialize()) + buf.WriteByte('\n') + buf.WriteString("committer ") + buf.Write(c.Committer.Serialize()) + buf.WriteByte('\n') + buf.WriteByte('\n') + buf.Write(c.Message) + + return buf.Bytes() +} + +// Serialize renders a Commit into canonical Git format. +func (c *Commit) Serialize() ([]byte, error) { + body := commitBody(c) + header, err := headerForType(ObjCommit, body) + if err != nil { + return nil, err + } + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + return raw, nil +} diff --git a/obj_tag.go b/obj_tag.go new file mode 100644 index 00000000..348afd48 --- /dev/null +++ b/obj_tag.go @@ -0,0 +1,145 @@ +package furgit + +import ( + "bytes" + "errors" + "fmt" +) + +// Tag models an annotated Git tag object. +type Tag struct { + objectBase + + Target Hash + TargetType ObjType + Name []byte + Tagger *Ident + Message []byte +} + +// ObjType allows Tag to satisfy the Object interface. +func (*Tag) ObjType() ObjType { + return ObjTag +} + +// parseTag parses a tag object body. +func parseTag(id Hash, body []byte) (*Tag, error) { + t := new(Tag) + t.objectBase = objectBase{Hash: id} + i := 0 + var haveTarget, haveType bool + + for i < len(body) { + rel := bytes.IndexByte(body[i:], '\n') + if rel < 0 { + return nil, errors.New("furgit: tag: missing newline") + } + line := body[i : i+rel] + i += rel + 1 + if len(line) == 0 { + break + } + + switch { + case bytes.HasPrefix(line, []byte("object ")): + hash, err := ParseHash(string(line[7:])) + if err != nil { + return nil, fmt.Errorf("furgit: tag: object: %w", err) + } + t.Target = hash + haveTarget = true + case bytes.HasPrefix(line, []byte("type ")): + switch string(line[5:]) { + case "commit": + t.TargetType = ObjCommit + case "tree": + t.TargetType = ObjTree + case "blob": + t.TargetType = ObjBlob + case "tag": + t.TargetType = ObjTag + default: + t.TargetType = ObjInvalid + return nil, errors.New("furgit: tag: unknown target type") + } + haveType = true + case bytes.HasPrefix(line, []byte("tag ")): + t.Name = append([]byte(nil), line[4:]...) + case bytes.HasPrefix(line, []byte("tagger ")): + idt, err := parseIdent(line[7:]) + if err != nil { + return nil, fmt.Errorf("furgit: tag: tagger: %w", err) + } + t.Tagger = idt + case bytes.HasPrefix(line, []byte("gpgsig ")), bytes.HasPrefix(line, []byte("gpgsig-sha256 ")): + for i < len(body) { + nextRel := bytes.IndexByte(body[i:], '\n') + if nextRel < 0 { + return nil, errors.New("furgit: tag: unterminated gpgsig") + } + if body[i] != ' ' { + break + } + i += nextRel + 1 + } + default: + // ignore unknown headers + } + } + + if !haveTarget || !haveType { + return nil, errors.New("furgit: tag: missing required headers") + } + + t.Message = append([]byte(nil), body[i:]...) + return t, nil +} + +func tagBody(t *Tag) ([]byte, error) { + var buf bytes.Buffer + fmt.Fprintf(&buf, "object %s\n", t.Target.String()) + buf.WriteString("type ") + switch t.TargetType { + case ObjCommit: + buf.WriteString("commit") + case ObjTree: + buf.WriteString("tree") + case ObjBlob: + buf.WriteString("blob") + case ObjTag: + buf.WriteString("tag") + case ObjInvalid, ObjFuture, ObjOfsDelta, ObjRefDelta: + return nil, fmt.Errorf("furgit: tag: invalid target type %d", t.TargetType) + default: + return nil, fmt.Errorf("furgit: tag: invalid target type %d", t.TargetType) + } + buf.WriteByte('\n') + buf.WriteString("tag ") + buf.Write(t.Name) + buf.WriteByte('\n') + if t.Tagger != nil { + buf.WriteString("tagger ") + buf.Write(t.Tagger.Serialize()) + buf.WriteByte('\n') + } + buf.WriteByte('\n') + buf.Write(t.Message) + + return buf.Bytes(), nil +} + +// Serialize renders a Tag into canonical Git format. +func (t *Tag) Serialize() ([]byte, error) { + body, err := tagBody(t) + if err != nil { + return nil, err + } + header, err := headerForType(ObjTag, body) + if err != nil { + return nil, err + } + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + return raw, nil +} diff --git a/obj_tree.go b/obj_tree.go new file mode 100644 index 00000000..c78fd375 --- /dev/null +++ b/obj_tree.go @@ -0,0 +1,110 @@ +package furgit + +import ( + "bytes" + "errors" + "fmt" + "strconv" +) + +// Tree represents a Git tree object. +type Tree struct { + objectBase + + Entries []TreeEntry +} + +// TreeEntry represents a single entry in a Git tree. +type TreeEntry struct { + Mode uint32 + Name []byte + ID Hash +} + +// ObjType allows Tree to satisfy the Object interface. +func (*Tree) ObjType() ObjType { + return ObjTree +} + +// parseTree decodes a tree body. +func parseTree(id Hash, body []byte) (*Tree, error) { + var entries []TreeEntry + i := 0 + for i < len(body) { + space := bytes.IndexByte(body[i:], ' ') + if space < 0 { + return nil, errors.New("furgit: tree: missing mode terminator") + } + modeBytes := body[i : i+space] + i += space + 1 + + nul := bytes.IndexByte(body[i:], 0) + if nul < 0 { + return nil, errors.New("furgit: tree: missing name terminator") + } + nameBytes := body[i : i+nul] + i += nul + 1 + + if i+HashSize > len(body) { + return nil, errors.New("furgit: tree: truncated child hash") + } + var child Hash + copy(child[:], body[i:i+HashSize]) + i += HashSize + + mode, err := strconv.ParseUint(string(modeBytes), 8, 32) + if err != nil { + return nil, fmt.Errorf("furgit: tree: parse mode: %w", err) + } + + entry := TreeEntry{ + Mode: uint32(mode), + Name: append([]byte(nil), nameBytes...), + ID: child, + } + entries = append(entries, entry) + } + + return &Tree{ + objectBase: objectBase{Hash: id}, + Entries: entries, + }, nil +} + +// treeBody builds the entry list for a tree without the Git header. +func treeBody(t *Tree) []byte { + var bodyLen int + for _, e := range t.Entries { + mode := strconv.FormatUint(uint64(e.Mode), 8) + bodyLen += len(mode) + 1 + len(e.Name) + 1 + HashSize + } + + body := make([]byte, bodyLen) + pos := 0 + for _, e := range t.Entries { + mode := strconv.FormatUint(uint64(e.Mode), 8) + pos += copy(body[pos:], []byte(mode)) + body[pos] = ' ' + pos++ + pos += copy(body[pos:], e.Name) + body[pos] = 0 + pos++ + pos += copy(body[pos:], e.ID[:]) + } + + return body +} + +// Serialize renders a Tree into canonical Git format. +func (t *Tree) Serialize() ([]byte, error) { + body := treeBody(t) + header, err := headerForType(ObjTree, body) + if err != nil { + return nil, err + } + + raw := make([]byte, len(header)+len(body)) + copy(raw, header) + copy(raw[len(header):], body) + return raw, nil +} diff --git a/objects_test.go b/objects_test.go new file mode 100644 index 00000000..b5b3c5b9 --- /dev/null +++ b/objects_test.go @@ -0,0 +1,186 @@ +package furgit + +import ( + "bytes" + "fmt" + "path/filepath" + "strings" + "testing" +) + +func mustHash(t *testing.T, hex string) Hash { + id, err := ParseHash(hex) + if err != nil { + t.Fatalf("ParseHash failed: %v", err) + } + return id +} + +func hashWithByte(fill byte) Hash { + var h Hash + for i := range h { + h[i] = fill + fill++ + } + return h +} + +func TestLoosePathUsesExpectedLayout(t *testing.T) { + id := mustHash(t, "0123456789abcdef0123456789abcdef01234567") + expect := filepath.Join("objects", "01", "23456789abcdef0123456789abcdef01234567") + if got := loosePath(id); got != expect { + t.Fatalf("unexpected loose path: %q", got) + } +} + +func TestParseBlobAndSerialize(t *testing.T) { + data := []byte("blob payload") + id := hashWithByte(0x10) + blob, err := parseBlob(id, data) + if err != nil { + t.Fatalf("parseBlob error: %v", err) + } + if !bytes.Equal(blob.Data, data) { + t.Fatalf("blob data mismatch: %q", blob.Data) + } + if blob.Hash != id { + t.Fatalf("blob hash mismatch: %v", blob.Hash) + } + raw, err := blob.Serialize() + if err != nil { + t.Fatalf("Serialize error: %v", err) + } + header, err := headerForType(ObjBlob, data) + if err != nil { + t.Fatalf("headerForType: %v", err) + } + want := append(append([]byte(nil), header...), data...) + if !bytes.Equal(raw, want) { + t.Fatalf("serialized blob mismatch") + } +} + +func TestParseTreeAndSerialize(t *testing.T) { + entries := []TreeEntry{ + {Mode: 0100644, Name: []byte("file.txt"), ID: hashWithByte(0x20)}, + {Mode: 040000, Name: []byte("subdir"), ID: hashWithByte(0x30)}, + } + body := treeBody(&Tree{Entries: entries}) + id := hashWithByte(0x40) + tree, err := parseTree(id, body) + if err != nil { + t.Fatalf("parseTree error: %v", err) + } + if len(tree.Entries) != len(entries) { + t.Fatalf("expected %d entries, got %d", len(entries), len(tree.Entries)) + } + for i := range entries { + if tree.Entries[i].Mode != entries[i].Mode || !bytes.Equal(tree.Entries[i].Name, entries[i].Name) || tree.Entries[i].ID != entries[i].ID { + t.Fatalf("entry %d mismatch", i) + } + } + serialized, err := (&Tree{Entries: entries}).Serialize() + if err != nil { + t.Fatalf("Serialize error: %v", err) + } + header, _ := headerForType(ObjTree, body) + want := append(append([]byte(nil), header...), body...) + if !bytes.Equal(serialized, want) { + t.Fatalf("serialized tree mismatch") + } +} + +func TestParseCommitWithExtraHeader(t *testing.T) { + treeID := hashWithByte(0x50) + parent := hashWithByte(0x60) + ident := Ident{ + Name: []byte("Alice"), + Email: []byte("alice@example.com"), + WhenUnix: 1700000000, + OffsetMinutes: -420, + } + var buf bytes.Buffer + fmt.Fprintf(&buf, "tree %s\n", treeID.String()) + fmt.Fprintf(&buf, "parent %s\n", parent.String()) + buf.WriteString("author ") + buf.Write(ident.Serialize()) + buf.WriteByte('\n') + buf.WriteString("committer ") + buf.Write(ident.Serialize()) + buf.WriteByte('\n') + buf.WriteString("extra data\n\nMessage body\n") + commit, err := parseCommit(hashWithByte(0x70), buf.Bytes()) + if err != nil { + t.Fatalf("parseCommit error: %v", err) + } + if commit.Tree != treeID { + t.Fatalf("tree mismatch") + } + if len(commit.Parents) != 1 || commit.Parents[0] != parent { + t.Fatalf("parent mismatch: %+v", commit.Parents) + } + if string(commit.Message) != "Message body\n" { + t.Fatalf("message mismatch: %q", commit.Message) + } + if len(commit.ExtraHeaders) != 1 || commit.ExtraHeaders[0].Key != "extra" || !bytes.Equal(commit.ExtraHeaders[0].Value, []byte("data")) { + t.Fatalf("extra headers mismatch: %+v", commit.ExtraHeaders) + } + + roundTrip := &Commit{ + Tree: treeID, + Parents: []Hash{parent}, + Author: ident, + Committer: ident, + Message: []byte("Message body\n"), + } + raw, err := roundTrip.Serialize() + if err != nil { + t.Fatalf("Serialize error: %v", err) + } + if !strings.Contains(string(raw), "tree "+treeID.String()) { + t.Fatalf("serialized commit missing tree header") + } +} + +func TestParseTagAndSerialize(t *testing.T) { + target := hashWithByte(0x80) + tagger := &Ident{ + Name: []byte("Tagger"), + Email: []byte("tagger@example.com"), + WhenUnix: 1234, + OffsetMinutes: 0, + } + var buf bytes.Buffer + buf.WriteString("object ") + buf.WriteString(target.String()) + buf.WriteByte('\n') + buf.WriteString("type commit\n") + buf.WriteString("tag v1.0\n") + buf.WriteString("tagger ") + buf.Write(tagger.Serialize()) + buf.WriteString("\n\nannotated tag\n") + body := append([]byte(nil), buf.Bytes()...) + tag, err := parseTag(hashWithByte(0x90), body) + if err != nil { + t.Fatalf("parseTag error: %v", err) + } + if tag.Target != target || tag.TargetType != ObjCommit { + t.Fatalf("tag target mismatch") + } + if tag.Tagger == nil { + t.Fatalf("tagger missing in body %q", string(body)) + } + if !bytes.Contains(tag.Tagger.Name, []byte("Tagger")) { + t.Fatalf("tagger name mismatch: %q", tag.Tagger.Name) + } + if string(tag.Name) != "v1.0" { + t.Fatalf("tag name mismatch: %q", tag.Name) + } + serialized, err := tag.Serialize() + if err != nil { + t.Fatalf("Serialize error: %v", err) + } + if !strings.Contains(string(serialized), "tag v1.0") { + t.Fatalf("serialized tag missing name header") + } +} diff --git a/pack_idx.go b/pack_idx.go new file mode 100644 index 00000000..eec2f3b9 --- /dev/null +++ b/pack_idx.go @@ -0,0 +1,286 @@ +package furgit + +import ( + "bytes" + "errors" + "os" + "path/filepath" + "strings" + "sync" + "syscall" +) + +const ( + idxMagic = 0xff744f63 + idxVersion2 = 2 +) + +type packIndex struct { + repo *Repository + idxRel string + packPath string + + loadOnce sync.Once + loadErr error + + numObjects int + fanout []byte + names []byte + crcs []byte + offset32 []byte + offset64 []byte + data []byte + + closeOnce sync.Once +} + +func (pi *packIndex) Close() error { + if pi == nil { + return nil + } + var closeErr error + pi.closeOnce.Do(func() { + if len(pi.data) > 0 { + if err := syscall.Munmap(pi.data); closeErr == nil { + closeErr = err + } + pi.data = nil + pi.fanout = nil + pi.names = nil + pi.crcs = nil + pi.offset32 = nil + pi.offset64 = nil + pi.numObjects = 0 + } + }) + return closeErr +} + +func (pi *packIndex) ensureLoaded() error { + pi.loadOnce.Do(func() { + pi.loadErr = pi.load() + }) + return pi.loadErr +} + +func (pi *packIndex) load() error { + if pi.repo == nil { + return ErrInvalidObject + } + f, err := os.Open(pi.repo.repoPath(pi.idxRel)) + if err != nil { + return err + } + stat, err := f.Stat() + if err != nil { + _ = f.Close() + return err + } + if stat.Size() < 8+256*4 { + _ = f.Close() + return ErrInvalidObject + } + region, err := syscall.Mmap( + int(f.Fd()), + 0, + int(stat.Size()), + syscall.PROT_READ, + syscall.MAP_PRIVATE, + ) + if err != nil { + _ = f.Close() + return err + } + err = f.Close() + if err != nil { + _ = syscall.Munmap(region) + return err + } + err = pi.parse(region) + if err != nil { + _ = syscall.Munmap(region) + return err + } + pi.data = region + return nil +} + +func (r *Repository) packIndexes() ([]*packIndex, error) { + r.packIdxOnce.Do(func() { + r.packIdx, r.packIdxErr = r.loadPackIndexes() + }) + return r.packIdx, r.packIdxErr +} + +func (repo *Repository) loadPackIndexes() ([]*packIndex, error) { + dir := filepath.Join(repo.rootPath, "objects", "pack") + entries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotFound + } + return nil, err + } + + idxs := make([]*packIndex, 0, len(entries)) + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".idx") { + continue + } + rel := filepath.Join("objects", "pack", entry.Name()) + packRel := strings.TrimSuffix(rel, ".idx") + ".pack" + idxs = append(idxs, &packIndex{ + repo: repo, + idxRel: rel, + packPath: packRel, + }) + } + if len(idxs) == 0 { + return nil, ErrNotFound + } + return idxs, nil +} + +func (pi *packIndex) parse(buf []byte) error { + if len(buf) < 8+256*4 { + return ErrInvalidObject + } + if readBE32(buf[0:4]) != idxMagic { + return ErrInvalidObject + } + if readBE32(buf[4:8]) != idxVersion2 { + return ErrInvalidObject + } + + const fanoutBytes = 256 * 4 + fanoutStart := 8 + fanoutEnd := fanoutStart + fanoutBytes + if fanoutEnd > len(buf) { + return ErrInvalidObject + } + pi.fanout = buf[fanoutStart:fanoutEnd] + nobj := int(readBE32(pi.fanout[len(pi.fanout)-4:])) + + namesStart := fanoutEnd + namesEnd := namesStart + nobj*HashSize + if namesEnd > len(buf) { + return ErrInvalidObject + } + + crcStart := namesEnd + crcEnd := crcStart + nobj*4 + if crcEnd > len(buf) { + return ErrInvalidObject + } + + off32Start := crcEnd + off32End := off32Start + nobj*4 + if off32End > len(buf) { + return ErrInvalidObject + } + + pi.offset32 = buf[off32Start:off32End] + + off64Start := off32End + trailerStart := len(buf) - 2*HashSize + if trailerStart < off64Start { + return ErrInvalidObject + } + if (trailerStart-off64Start)%8 != 0 { + return ErrInvalidObject + } + off64End := trailerStart + pi.offset64 = buf[off64Start:off64End] + + pi.numObjects = nobj + pi.names = buf[namesStart:namesEnd] + pi.crcs = buf[crcStart:crcEnd] + return nil +} + +func readBE32(b []byte) uint32 { + _ = b[3] + return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3]) +} + +func readBE64(b []byte) uint64 { + _ = b[7] + return (uint64(b[0]) << 56) | (uint64(b[1]) << 48) | + (uint64(b[2]) << 40) | (uint64(b[3]) << 32) | + (uint64(b[4]) << 24) | (uint64(b[5]) << 16) | + (uint64(b[6]) << 8) | uint64(b[7]) +} + +func (pi *packIndex) fanoutEntry(i int) uint32 { + if len(pi.fanout) == 0 { + return 0 + } + entries := len(pi.fanout) / 4 + if i < 0 || i >= entries { + return 0 + } + start := i * 4 + return readBE32(pi.fanout[start : start+4]) +} + +func (pi *packIndex) offset(idx int) (uint64, error) { + start := idx * 4 + word := readBE32(pi.offset32[start : start+4]) + if word&0x80000000 == 0 { + return uint64(word), nil + } + pos := int(word & 0x7fffffff) + entries := len(pi.offset64) / 8 + if pos < 0 || pos >= entries { + return 0, errors.New("furgit: pack: corrupt 64-bit offset table") + } + base := pos * 8 + return readBE64(pi.offset64[base : base+8]), nil +} + +func (pi *packIndex) lookup(id Hash) (PackLocation, error) { + err := pi.ensureLoaded() + if err != nil { + return PackLocation{}, err + } + first := int(id[0]) + var lo int + if first > 0 { + lo = int(pi.fanoutEntry(first - 1)) + } + hi := int(pi.fanoutEntry(first)) + idx, found := bsearchHash(pi.names, HashSize, lo, hi, id) + if !found { + return PackLocation{}, ErrNotFound + } + ofs, err := pi.offset(idx) + if err != nil { + return PackLocation{}, err + } + return PackLocation{ + PackPath: pi.packPath, + Offset: ofs, + }, nil +} + +func bsearchHash(names []byte, stride, lo, hi int, want Hash) (int, bool) { + for lo < hi { + mid := lo + (hi-lo)/2 + cmp := compareHash(names, stride, mid, want[:]) + if cmp == 0 { + return mid, true + } + if cmp > 0 { + hi = mid + } else { + lo = mid + 1 + } + } + return lo, false +} + +func compareHash(names []byte, stride, idx int, want []byte) int { + base := idx * stride + end := base + stride + return bytes.Compare(names[base:end], want) +} diff --git a/pack_pack.go b/pack_pack.go new file mode 100644 index 00000000..20974669 --- /dev/null +++ b/pack_pack.go @@ -0,0 +1,473 @@ +package furgit + +import ( + "bytes" + "compress/zlib" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "os" + "sync" + "syscall" +) + +const ( + packMagic = 0x5041434b + packVersion2 = 2 +) + +// PackLocation identifies the path to a pack file and an offset inside it. +type PackLocation struct { + PackPath string + Offset uint64 +} + +func (repo *Repository) packRead(id Hash) (Object, error) { + loc, err := repo.packIndexFind(id) + if err != nil { + return nil, err + } + return repo.packReadAt(loc, id) +} + +func (repo *Repository) packIndexFind(id Hash) (PackLocation, error) { + idxs, err := repo.packIndexes() + if err != nil { + return PackLocation{}, err + } + for _, idx := range idxs { + loc, err := idx.lookup(id) + if errors.Is(err, ErrNotFound) { + continue + } + if err != nil { + return PackLocation{}, err + } + return loc, nil + } + return PackLocation{}, ErrNotFound +} + +func (repo *Repository) packReadAt(loc PackLocation, want Hash) (Object, error) { + ty, body, err := repo.packBodyResolveAtLocation(loc) + if err != nil { + return nil, err + } + data := body.Bytes() + if !verifyTypedObject(ty, data, want) { + body.Release() + return nil, ErrInvalidObject + } + obj, err := parseObjectBody(ty, want, data) + body.Release() + return obj, err +} + +func (repo *Repository) packBodyResolveAtLocation(loc PackLocation) (ObjType, borrowedBody, error) { + pf, err := repo.packFile(loc.PackPath) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + return repo.packBodyResolveWithin(pf, loc.Offset) +} + +func packHeaderRead(r io.Reader) (ObjType, int, error) { + var b [1]byte + _, err := io.ReadFull(r, b[:]) + if err != nil { + return ObjInvalid, 0, err + } + ty := ObjType((b[0] >> 4) & 0x07) + size := int(b[0] & 0x0f) + shift := 4 + for (b[0] & 0x80) != 0 { + _, err = io.ReadFull(r, b[:]) + if err != nil { + return ObjInvalid, 0, err + } + size |= int(b[0]&0x7f) << shift + shift += 7 + if (b[0] & 0x80) == 0 { + break + } + } + return ty, size, nil +} + +func packSectionInflate(r io.Reader, sizeHint int) (borrowedBody, error) { + zr, err := zlib.NewReader(r) + if err != nil { + return borrowedBody{}, err + } + defer func() { _ = zr.Close() }() + + if sizeHint > 0 { + body := borrowBody(sizeHint) + body.Resize(sizeHint) + _, err := io.ReadFull(zr, body.Bytes()) + if err != nil { + body.Release() + return borrowedBody{}, err + } + var extra [1]byte + _, err = zr.Read(extra[:]) + if err != io.EOF { + body.Release() + if err == nil { + return borrowedBody{}, ErrInvalidObject + } + return borrowedBody{}, err + } + return body, nil + } + + body := borrowBody(defaultBodyCap) + var scratch [32 * 1024]byte + for { + n, err := zr.Read(scratch[:]) + if n > 0 { + body.Append(scratch[:n]) + } + if err == io.EOF { + return body, nil + } + if err != nil { + body.Release() + return borrowedBody{}, err + } + } +} + +func (repo *Repository) packDeltaResolveOfs(pf *packFile, deltaOffset uint64, r io.Reader) (ObjType, borrowedBody, error) { + dist, err := packDeltaReadOfsDistance(r) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + var baseOfs uint64 + if deltaOffset > dist { + baseOfs = deltaOffset - dist + } + if baseOfs == 0 { + return ObjInvalid, borrowedBody{}, ErrInvalidObject + } + ty, body, err := repo.packBodyResolveWithin(pf, baseOfs) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + delta, err := packSectionInflate(r, 0) + if err != nil { + body.Release() + return ObjInvalid, borrowedBody{}, err + } + out, err := packDeltaApply(body, delta) + delta.Release() + body.Release() + if err != nil { + out.Release() + return ObjInvalid, borrowedBody{}, err + } + return ty, out, nil +} + +func packDeltaReadOfsDistance(r io.Reader) (uint64, error) { + var b [1]byte + _, err := io.ReadFull(r, b[:]) + if err != nil { + return 0, err + } + dist := uint64(b[0] & 0x7f) + for (b[0] & 0x80) != 0 { + _, err = io.ReadFull(r, b[:]) + if err != nil { + return 0, err + } + dist = ((dist + 1) << 7) + uint64(b[0]&0x7f) + } + return dist, nil +} + +func (repo *Repository) packBodyResolveByID(id Hash) (ObjType, borrowedBody, error) { + loc, err := repo.packIndexFind(id) + if err == nil { + return repo.packBodyResolveAtLocation(loc) + } + if !errors.Is(err, ErrNotFound) { + return ObjInvalid, borrowedBody{}, err + } + ty, body, err := repo.looseReadTyped(id) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + return ty, borrowedFromOwned(body), nil +} + +func (repo *Repository) packBodyResolveWithin(pf *packFile, ofs uint64) (ObjType, borrowedBody, error) { + r, err := pf.cursor(ofs) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + ty, size, err := packHeaderRead(r) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + + switch ty { + case ObjCommit, ObjTree, ObjBlob, ObjTag: + body, err := packSectionInflate(r, size) + return ty, body, err + case ObjRefDelta: + var base Hash + _, err := io.ReadFull(r, base[:]) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + delta, err := packSectionInflate(r, 0) + if err != nil { + return ObjInvalid, borrowedBody{}, err + } + bt, body, err := repo.packBodyResolveByID(base) + if err != nil { + delta.Release() + return ObjInvalid, borrowedBody{}, err + } + out, err := packDeltaApply(body, delta) + delta.Release() + body.Release() + if err != nil { + out.Release() + return ObjInvalid, borrowedBody{}, err + } + return bt, out, nil + case ObjOfsDelta: + return repo.packDeltaResolveOfs(pf, ofs, r) + case ObjInvalid, ObjFuture: + return ObjInvalid, borrowedBody{}, ErrInvalidObject + default: + return ObjInvalid, borrowedBody{}, ErrInvalidObject + } +} + +func packDeltaApply(base, delta borrowedBody) (borrowedBody, error) { + pos := 0 + baseBytes := base.Bytes() + deltaBytes := delta.Bytes() + srcSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return borrowedBody{}, err + } + dstSize, err := packVarintRead(deltaBytes, &pos) + if err != nil { + return borrowedBody{}, err + } + if srcSize != len(baseBytes) { + return borrowedBody{}, ErrInvalidObject + } + out := borrowBody(dstSize) + out.Resize(dstSize) + outBytes := out.Bytes() + outPos := 0 + + for pos < len(deltaBytes) { + op := deltaBytes[pos] + pos++ + switch { + case op&0x80 != 0: + off := 0 + n := 0 + if op&0x01 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) + pos++ + } + if op&0x02 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x04 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 16 + pos++ + } + if op&0x08 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + off |= int(deltaBytes[pos]) << 24 + pos++ + } + if op&0x10 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) + pos++ + } + if op&0x20 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 8 + pos++ + } + if op&0x40 != 0 { + if pos >= len(deltaBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + n |= int(deltaBytes[pos]) << 16 + pos++ + } + if n == 0 { + n = 0x10000 + } + if off+n > len(baseBytes) || outPos+n > len(outBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + copy(outBytes[outPos:], baseBytes[off:off+n]) + outPos += n + case op != 0: + n := int(op) + if pos+n > len(deltaBytes) || outPos+n > len(outBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + copy(outBytes[outPos:], deltaBytes[pos:pos+n]) + pos += n + outPos += n + default: + out.Release() + return borrowedBody{}, ErrInvalidObject + } + } + + if outPos != len(outBytes) { + out.Release() + return borrowedBody{}, ErrInvalidObject + } + return out, nil +} + +func packVarintRead(buf []byte, pos *int) (int, error) { + res := 0 + shift := 0 + for { + if *pos >= len(buf) { + return 0, ErrInvalidObject + } + b := buf[*pos] + *pos++ + res |= int(b&0x7f) << shift + if (b & 0x80) == 0 { + break + } + shift += 7 + } + return res, nil +} + +type packFile struct { + relPath string + size int64 + data []byte + closeMu sync.Once +} + +func openPackFile(absPath, rel string) (*packFile, error) { + f, err := os.Open(absPath) + if err != nil { + return nil, err + } + + stat, err := f.Stat() + if err != nil { + _ = f.Close() + return nil, err + } + if stat.Size() < 12 { + _ = f.Close() + return nil, ErrInvalidObject + } + + header := make([]byte, 12) + _, err = io.ReadFull(f, header) + if err != nil { + _ = f.Close() + return nil, err + } + magic := binary.BigEndian.Uint32(header[:4]) + ver := binary.BigEndian.Uint32(header[4:8]) + if magic != packMagic || ver != packVersion2 { + _ = f.Close() + return nil, ErrInvalidObject + } + + region, err := syscall.Mmap( + int(f.Fd()), + 0, + int(stat.Size()), + syscall.PROT_READ, + syscall.MAP_PRIVATE, + ) + if err != nil { + _ = f.Close() + return nil, err + } + err = f.Close() + if err != nil { + _ = syscall.Munmap(region) + return nil, err + } + return &packFile{ + relPath: rel, + size: stat.Size(), + data: region, + }, nil +} + +func (pf *packFile) Close() error { + if pf == nil { + return nil + } + var closeErr error + pf.closeMu.Do(func() { + if len(pf.data) > 0 { + if err := syscall.Munmap(pf.data); closeErr == nil { + closeErr = err + } + pf.data = nil + } + }) + return closeErr +} + +func (pf *packFile) cursor(ofs uint64) (io.Reader, error) { + if pf == nil { + return nil, ErrInvalidObject + } + if pf.size < 0 { + return nil, ErrInvalidObject + } + if ofs > uint64(pf.size) { + return nil, fmt.Errorf("furgit: pack: offset %d beyond %s", ofs, pf.relPath) + } + if ofs > uint64(math.MaxInt64) { + return nil, fmt.Errorf("furgit: pack: offset %d too large", ofs) + } + return bytes.NewReader(pf.data[ofs:]), nil +} diff --git a/pack_test.go b/pack_test.go new file mode 100644 index 00000000..5661e322 --- /dev/null +++ b/pack_test.go @@ -0,0 +1,214 @@ +package furgit + +import ( + "bytes" + "compress/zlib" + "encoding/binary" + "testing" +) + +func compressBytes(t *testing.T, payload []byte) []byte { + var buf bytes.Buffer + zw := zlib.NewWriter(&buf) + if _, err := zw.Write(payload); err != nil { + t.Fatalf("compress write: %v", err) + } + if err := zw.Close(); err != nil { + t.Fatalf("compress close: %v", err) + } + return buf.Bytes() +} + +func TestPackSectionInflate(t *testing.T) { + payload := []byte("pack payload") + compressed := compressBytes(t, payload) + body, err := packSectionInflate(bytes.NewReader(compressed), len(payload)) + if err != nil { + t.Fatalf("packSectionInflate error: %v", err) + } + if got := string(body.Bytes()); got != string(payload) { + t.Fatalf("unexpected inflated data: %q", got) + } + body.Release() + + body, err = packSectionInflate(bytes.NewReader(compressed), 0) + if err != nil { + t.Fatalf("packSectionInflate streaming error: %v", err) + } + if got := string(body.Bytes()); got != string(payload) { + t.Fatalf("unexpected streaming data: %q", got) + } + body.Release() +} + +func encodePackHeader(ty ObjType, size int) []byte { + first := byte((ty & 0x7) << 4) + first |= byte(size & 0x0f) + size >>= 4 + if size == 0 { + return []byte{first} + } + first |= 0x80 + out := []byte{first} + for size > 0 { + b := byte(size & 0x7f) + size >>= 7 + if size != 0 { + b |= 0x80 + } + out = append(out, b) + } + return out +} + +func TestPackHeaderRead(t *testing.T) { + buf := encodePackHeader(ObjTree, 0x1fff) + ty, size, err := packHeaderRead(bytes.NewReader(buf)) + if err != nil { + t.Fatalf("packHeaderRead error: %v", err) + } + if ty != ObjTree || size != 0x1fff { + t.Fatalf("unexpected header decode ty=%d size=%d", ty, size) + } + if _, _, err := packHeaderRead(bytes.NewReader([]byte{0x80})); err == nil { + t.Fatal("expected error for truncated header") + } +} + +func encodeVarint(value int) []byte { + var out []byte + for { + b := byte(value & 0x7f) + value >>= 7 + if value != 0 { + b |= 0x80 + } + out = append(out, b) + if value == 0 { + break + } + } + return out +} + +func TestPackVarintRead(t *testing.T) { + buf := encodeVarint(0x3456) + pos := 0 + val, err := packVarintRead(buf, &pos) + if err != nil { + t.Fatalf("packVarintRead error: %v", err) + } + if val != 0x3456 { + t.Fatalf("unexpected varint value: %d", val) + } + if pos != len(buf) { + t.Fatalf("expected pos %d, got %d", len(buf), pos) + } + bad := []byte{0x80} + pos = 0 + if _, err := packVarintRead(bad, &pos); err == nil { + t.Fatal("expected error for unterminated varint") + } +} + +func TestPackDeltaApply(t *testing.T) { + base := borrowedFromOwned([]byte("abcdefghij")) + defer base.Release() + deltaBytes := []byte{0x0a, 0x0a, 0x91, 0x00, 0x03, 0x03, 'X', 'Y', 'Z', 0x91, 0x06, 0x04} + delta := borrowedFromOwned(deltaBytes) + defer delta.Release() + out, err := packDeltaApply(base, delta) + if err != nil { + t.Fatalf("packDeltaApply error: %v", err) + } + if got := string(out.Bytes()); got != "abcXYZghij" { + t.Fatalf("unexpected delta output: %q", got) + } + out.Release() +} + +func TestPackDeltaApplyMismatchedBaseSize(t *testing.T) { + base := borrowedFromOwned([]byte("abc")) + defer base.Release() + delta := borrowedFromOwned([]byte{0x04, 0x04}) + defer delta.Release() + if _, err := packDeltaApply(base, delta); err == nil { + t.Fatal("expected error for mismatched base size") + } +} + +func TestPackDeltaReadOfsDistance(t *testing.T) { + dist, err := packDeltaReadOfsDistance(bytes.NewReader([]byte{0x81, 0x01})) + if err != nil { + t.Fatalf("packDeltaReadOfsDistance error: %v", err) + } + if dist != 257 { + t.Fatalf("unexpected distance: %d", dist) + } + if _, err := packDeltaReadOfsDistance(bytes.NewReader([]byte{})); err == nil { + t.Fatal("expected error for empty reader") + } +} + +func TestBsearchHash(t *testing.T) { + h1 := hashWithByte(0x01) + h2 := hashWithByte(0x03) + names := append(append([]byte(nil), h1[:]...), h2[:]...) + idx, found := bsearchHash(names, HashSize, 0, 2, h2) + if !found || idx != 1 { + t.Fatalf("expected to find second hash, idx=%d found=%v", idx, found) + } + _, found = bsearchHash(names, HashSize, 0, 2, hashWithByte(0x05)) + if found { + t.Fatalf("did not expect to find unknown hash") + } +} + +func buildTestPackIndexBuffer(hash Hash, offset uint32) []byte { + fanout := make([]byte, 256*4) + first := int(hash[0]) + for i := 0; i < 256; i++ { + var val uint32 + if i >= first { + val = 1 + } + binary.BigEndian.PutUint32(fanout[i*4:], val) + } + var buf bytes.Buffer + _ = binary.Write(&buf, binary.BigEndian, uint32(idxMagic)) + _ = binary.Write(&buf, binary.BigEndian, uint32(idxVersion2)) + buf.Write(fanout) + buf.Write(hash[:]) + buf.Write(make([]byte, 4)) + off32 := make([]byte, 4) + binary.BigEndian.PutUint32(off32, offset) + buf.Write(off32) + buf.Write(make([]byte, 40)) + return buf.Bytes() +} + +func TestPackIndexParse(t *testing.T) { + h := hashWithByte(0x11) + data := buildTestPackIndexBuffer(h, 0x12345678) + pi := &packIndex{} + if err := pi.parse(data); err != nil { + t.Fatalf("parse error: %v", err) + } + if pi.numObjects != 1 { + t.Fatalf("expected 1 object, got %d", pi.numObjects) + } + if got, err := pi.offset(0); err != nil || got != 0x12345678 { + t.Fatalf("unexpected 32-bit offset or error: %d, %v", got, err) + } +} + +func TestPackIndexOffset64(t *testing.T) { + pi := &packIndex{} + pi.offset32 = make([]byte, 4) + binary.BigEndian.PutUint32(pi.offset32, 0x80000000) + pi.offset64 = make([]byte, 8) + binary.BigEndian.PutUint64(pi.offset64, 0x1_0000_0000) + if got, err := pi.offset(0); err != nil || got != 0x1_0000_0000 { + t.Fatalf("unexpected 64-bit offset or error: %d, %v", got, err) + } +} diff --git a/refs.go b/refs.go new file mode 100644 index 00000000..df3a8303 --- /dev/null +++ b/refs.go @@ -0,0 +1,94 @@ +package furgit + +import ( + "bufio" + "bytes" + "errors" + "os" + "strings" +) + +// ResolveRef resolves a fully qualified ref name to its object ID. +func (repo *Repository) ResolveRef(refname string) (Hash, error) { + id, err := repo.resolveLooseRef(refname) + if err == nil { + return id, nil + } else if !errors.Is(err, ErrNotFound) { + return Hash{}, err + } + + return repo.resolvePackedRef(refname) +} + +func (repo *Repository) resolveLooseRef(refname string) (Hash, error) { + data, err := os.ReadFile(repo.repoPath(refname)) + if err != nil { + if os.IsNotExist(err) { + return Hash{}, ErrNotFound + } + return Hash{}, err + } + line := strings.TrimSpace(string(data)) + id, err := ParseHash(line) + if err != nil { + return Hash{}, err + } + return id, nil +} + +func (repo *Repository) resolvePackedRef(refname string) (Hash, error) { + path := repo.repoPath("packed-refs") + f, err := os.Open(path) + if err != nil { + if os.IsNotExist(err) { + return Hash{}, ErrInvalidObject + } + return Hash{}, err + } + defer func() { _ = f.Close() }() + + want := []byte(refname) + scanner := bufio.NewScanner(f) + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 || line[0] == '#' || line[0] == '^' { + continue + } + sp := bytes.IndexByte(line, ' ') + if sp != HashSize*2 { + continue + } + name := line[sp+1:] + if bytes.Equal(name, want) { + hex := string(line[:sp]) + id, err := ParseHash(hex) + if err != nil { + return Hash{}, err + } + return id, nil + } + } + scanErr := scanner.Err() + if scanErr != nil { + return Hash{}, scanErr + } + return Hash{}, ErrInvalidObject +} + +// ResolveHEAD reads HEAD and returns the ref that HEAD points to. +func (repo *Repository) ResolveHEAD() (string, error) { + data, err := os.ReadFile(repo.repoPath("HEAD")) + if err != nil { + return "", err + } + line := strings.TrimSpace(string(data)) + const prefix = "ref: " + if strings.HasPrefix(line, prefix) { + ref := strings.TrimSpace(line[len(prefix):]) + if ref == "" { + return "", ErrInvalidRef + } + return ref, nil + } + return "", ErrInvalidRef +} diff --git a/repo.go b/repo.go new file mode 100644 index 00000000..6560c2b0 --- /dev/null +++ b/repo.go @@ -0,0 +1,82 @@ +package furgit + +import ( + "os" + "path/filepath" + "sync" +) + +// Repository represents the root of a Git repository. +type Repository struct { + rootPath string + + packIdxOnce sync.Once + packIdx []*packIndex + packIdxErr error + + packFiles sync.Map // string, *packFile + closeOnce sync.Once +} + +// OpenRepository opens the repository at the provided path. +func OpenRepository(path string) (*Repository, error) { + fi, err := os.Stat(path) + if err != nil { + return nil, err + } + if !fi.IsDir() { + return nil, ErrInvalidObject + } + return &Repository{rootPath: path}, nil +} + +func (r *Repository) Close() error { + var closeErr error + r.closeOnce.Do(func() { + r.packFiles.Range(func(keya any, pfa any) bool { + key := keya.(string) + pf := pfa.(*packFile) + err := pf.Close() + if err != nil && closeErr == nil { + closeErr = err + } + r.packFiles.Delete(key) + return true + }) + if len(r.packIdx) > 0 { + for _, idx := range r.packIdx { + err := idx.Close() + if err != nil && closeErr == nil { + closeErr = err + } + } + } + }) + return closeErr +} + +// Root returns the repository root path. +func (r *Repository) Root() string { + return r.rootPath +} + +// repoPath joins the root with a relative path. +func (r *Repository) repoPath(rel string) string { + return filepath.Join(r.rootPath, rel) +} + +func (r *Repository) packFile(rel string) (*packFile, error) { + if pf, ok := r.packFiles.Load(rel); ok { + return pf.(*packFile), nil + } + pf, err := openPackFile(r.repoPath(rel), rel) + if err != nil { + return nil, err + } + actual, loaded := r.packFiles.LoadOrStore(rel, pf) + if loaded { + _ = pf.Close() + return actual.(*packFile), nil + } + return pf, nil +} diff --git a/repo_test.go b/repo_test.go new file mode 100644 index 00000000..22b306c9 --- /dev/null +++ b/repo_test.go @@ -0,0 +1,122 @@ +package furgit + +import ( + "bytes" + "compress/zlib" + "errors" + "fmt" + "os" + "path/filepath" + "testing" +) + +func writeLooseBlob(t *testing.T, root string, data []byte) Hash { + header, err := headerForType(ObjBlob, data) + if err != nil { + t.Fatalf("headerForType: %v", err) + } + raw := append(append([]byte(nil), header...), data...) + id := computeRawHash(raw) + path := filepath.Join(root, loosePath(id)) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir for loose object: %v", err) + } + var buf bytes.Buffer + zw := zlib.NewWriter(&buf) + if _, err := zw.Write(raw); err != nil { + t.Fatalf("compress: %v", err) + } + if err := zw.Close(); err != nil { + t.Fatalf("close zlib: %v", err) + } + if err := os.WriteFile(path, buf.Bytes(), 0o644); err != nil { + t.Fatalf("write loose object: %v", err) + } + return id +} + +func TestOpenRepositoryAndLooseRead(t *testing.T) { + root := t.TempDir() + repo, err := OpenRepository(root) + if err != nil { + t.Fatalf("OpenRepository error: %v", err) + } + t.Cleanup(func() { _ = repo.Close() }) + + id := writeLooseBlob(t, root, []byte("loose blob payload")) + obj, err := repo.looseRead(id) + if err != nil { + t.Fatalf("looseRead error: %v", err) + } + blob, ok := obj.(*Blob) + if !ok { + t.Fatalf("expected Blob, got %T", obj) + } + if string(blob.Data) != "loose blob payload" { + t.Fatalf("blob data mismatch: %q", blob.Data) + } +} + +func TestResolveRefLooseAndPacked(t *testing.T) { + root := t.TempDir() + repo, err := OpenRepository(root) + if err != nil { + t.Fatalf("OpenRepository error: %v", err) + } + t.Cleanup(func() { _ = repo.Close() }) + + looseID := hashWithByte(0xa0) + loosePath := filepath.Join(root, "refs", "heads") + if err := os.MkdirAll(loosePath, 0o755); err != nil { + t.Fatalf("mkdir refs: %v", err) + } + if err := os.WriteFile(filepath.Join(loosePath, "master"), []byte(looseID.String()+"\n"), 0o644); err != nil { + t.Fatalf("write ref: %v", err) + } + id, err := repo.ResolveRef("refs/heads/master") + if err != nil || id != looseID { + t.Fatalf("ResolveRef loose mismatch (id=%v err=%v)", id, err) + } + + packedID := hashWithByte(0xb0) + packed := fmt.Sprintf("%s refs/tags/v1\n", packedID.String()) + if err := os.WriteFile(filepath.Join(root, "packed-refs"), []byte(packed), 0o644); err != nil { + t.Fatalf("write packed refs: %v", err) + } + id, err = repo.resolvePackedRef("refs/tags/v1") + if err != nil || id != packedID { + t.Fatalf("resolvePackedRef direct mismatch (id=%v err=%v)", id, err) + } + id, err = repo.ResolveRef("refs/tags/v1") + if err != nil || id != packedID { + t.Fatalf("ResolveRef packed mismatch (id=%v err=%v)", id, err) + } + + if _, err := repo.ResolveRef("refs/heads/missing"); !errors.Is(err, ErrInvalidObject) { + t.Fatalf("expected ErrInvalidObject for missing ref, got %v", err) + } +} + +func TestResolveHEAD(t *testing.T) { + root := t.TempDir() + repo, err := OpenRepository(root) + if err != nil { + t.Fatalf("OpenRepository error: %v", err) + } + t.Cleanup(func() { _ = repo.Close() }) + + headPath := filepath.Join(root, "HEAD") + if err := os.WriteFile(headPath, []byte("ref: refs/heads/master\n"), 0o644); err != nil { + t.Fatalf("write HEAD: %v", err) + } + ref, err := repo.ResolveHEAD() + if err != nil || ref != "refs/heads/master" { + t.Fatalf("ResolveHEAD mismatch (ref=%q err=%v)", ref, err) + } + if err := os.WriteFile(headPath, []byte("detached\n"), 0o644); err != nil { + t.Fatalf("write HEAD detached: %v", err) + } + if _, err := repo.ResolveHEAD(); err == nil { + t.Fatal("expected error for detached HEAD") + } +} |
